言語アナライザー

特定の言語テキストを分析するためのアナライザーのセットです。以下のタイプがサポートされています: arabic, armenian, basque, bengali, brazilian, bulgarian, catalan, cjk, czech, danish, dutch, english, estonian, finnish, french, galician, german, greek, hindi, hungarian, indonesian, irish, italian, latvian, lithuanian, norwegian, persian, portuguese, romanian, russian, serbian, sorani, spanish, swedish, turkish, thai

言語アナライザーの設定

ストップワード

すべてのアナライザーは、内部で設定するか、外部のストップワードファイルを使用して stopwords を設定することをサポートしています。詳細については、ストップアナライザーを確認してください。

ステミングからの単語の除外

stem_exclusion パラメーターを使用して、ステミングされるべきでない小文字の単語の配列を指定できます。この機能は、keyword_marker トークンフィルターを追加し、keywordsstem_exclusion パラメーターの値に設定することで内部的に実装されています。

次のアナライザーは、カスタム stem_exclusion リストの設定をサポートしています: arabic, armenian, basque, bengali, bulgarian, catalan, czech, dutch, english, finnish, french, galician, german, hindi, hungarian, indonesian, irish, italian, latvian, lithuanian, norwegian, portuguese, romanian, russian, serbian, sorani, spanish, swedish, turkish

言語アナライザーの再実装

組み込みの言語アナライザーは、custom アナライザー(以下に説明)として再実装することで、その動作をカスタマイズできます。

ステミングから単語を除外するつもりがない場合(上記の stem_exclusion パラメーターに相当)、カスタムアナライザーの設定から keyword_marker トークンフィルターを削除する必要があります。

アラビア語アナライザー

arabic アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="arabic_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "arabic_stop": {
  7. "type": "stop",
  8. "stopwords": "_arabic_"
  9. },
  10. "arabic_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "مثال"
  14. ]
  15. },
  16. "arabic_stemmer": {
  17. "type": "stemmer",
  18. "language": "arabic"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_arabic": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "decimal_digit",
  27. "arabic_stop",
  28. "arabic_normalization",
  29. "arabic_keywords",
  30. "arabic_stemmer"
  31. ]
  32. }
  33. }
  34. }
  35. },
  36. )
  37. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'arabic_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. arabic_stop: {
  8. type: 'stop',
  9. stopwords: '_arabic_'
  10. },
  11. arabic_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'مثال'
  15. ]
  16. },
  17. arabic_stemmer: {
  18. type: 'stemmer',
  19. language: 'arabic'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_arabic: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'decimal_digit',
  28. 'arabic_stop',
  29. 'arabic_normalization',
  30. 'arabic_keywords',
  31. 'arabic_stemmer'
  32. ]
  33. }
  34. }
  35. }
  36. }
  37. }
  38. )
  39. puts response

Js

  1. const response = await client.indices.create({
  2. index: "arabic_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. arabic_stop: {
  7. type: "stop",
  8. stopwords: "_arabic_",
  9. },
  10. arabic_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["مثال"],
  13. },
  14. arabic_stemmer: {
  15. type: "stemmer",
  16. language: "arabic",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_arabic: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "decimal_digit",
  25. "arabic_stop",
  26. "arabic_normalization",
  27. "arabic_keywords",
  28. "arabic_stemmer",
  29. ],
  30. },
  31. },
  32. },
  33. },
  34. });
  35. console.log(response);

コンソール

  1. PUT /arabic_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "arabic_stop": {
  7. "type": "stop",
  8. "stopwords": "_arabic_"
  9. },
  10. "arabic_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["مثال"]
  13. },
  14. "arabic_stemmer": {
  15. "type": "stemmer",
  16. "language": "arabic"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_arabic": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "decimal_digit",
  25. "arabic_stop",
  26. "arabic_normalization",
  27. "arabic_keywords",
  28. "arabic_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. }
  34. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

アルメニア語アナライザー

armenian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="armenian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "armenian_stop": {
  7. "type": "stop",
  8. "stopwords": "_armenian_"
  9. },
  10. "armenian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "օրինակ"
  14. ]
  15. },
  16. "armenian_stemmer": {
  17. "type": "stemmer",
  18. "language": "armenian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_armenian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "armenian_stop",
  27. "armenian_keywords",
  28. "armenian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'armenian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. armenian_stop: {
  8. type: 'stop',
  9. stopwords: '_armenian_'
  10. },
  11. armenian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'օրինակ'
  15. ]
  16. },
  17. armenian_stemmer: {
  18. type: 'stemmer',
  19. language: 'armenian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_armenian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'armenian_stop',
  28. 'armenian_keywords',
  29. 'armenian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "armenian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. armenian_stop: {
  7. type: "stop",
  8. stopwords: "_armenian_",
  9. },
  10. armenian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["օրինակ"],
  13. },
  14. armenian_stemmer: {
  15. type: "stemmer",
  16. language: "armenian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_armenian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "armenian_stop",
  25. "armenian_keywords",
  26. "armenian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /armenian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "armenian_stop": {
  7. "type": "stop",
  8. "stopwords": "_armenian_"
  9. },
  10. "armenian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["օրինակ"]
  13. },
  14. "armenian_stemmer": {
  15. "type": "stemmer",
  16. "language": "armenian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_armenian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "armenian_stop",
  25. "armenian_keywords",
  26. "armenian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

バスク語アナライザー

basque アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="basque_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "basque_stop": {
  7. "type": "stop",
  8. "stopwords": "_basque_"
  9. },
  10. "basque_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "Adibidez"
  14. ]
  15. },
  16. "basque_stemmer": {
  17. "type": "stemmer",
  18. "language": "basque"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_basque": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "basque_stop",
  27. "basque_keywords",
  28. "basque_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'basque_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. basque_stop: {
  8. type: 'stop',
  9. stopwords: '_basque_'
  10. },
  11. basque_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'Adibidez'
  15. ]
  16. },
  17. basque_stemmer: {
  18. type: 'stemmer',
  19. language: 'basque'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_basque: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'basque_stop',
  28. 'basque_keywords',
  29. 'basque_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "basque_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. basque_stop: {
  7. type: "stop",
  8. stopwords: "_basque_",
  9. },
  10. basque_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["Adibidez"],
  13. },
  14. basque_stemmer: {
  15. type: "stemmer",
  16. language: "basque",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_basque: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "basque_stop",
  25. "basque_keywords",
  26. "basque_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /basque_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "basque_stop": {
  7. "type": "stop",
  8. "stopwords": "_basque_"
  9. },
  10. "basque_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["Adibidez"]
  13. },
  14. "basque_stemmer": {
  15. "type": "stemmer",
  16. "language": "basque"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_basque": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "basque_stop",
  25. "basque_keywords",
  26. "basque_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ベンガル語アナライザー

bengali アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="bengali_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "bengali_stop": {
  7. "type": "stop",
  8. "stopwords": "_bengali_"
  9. },
  10. "bengali_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "উদাহরণ"
  14. ]
  15. },
  16. "bengali_stemmer": {
  17. "type": "stemmer",
  18. "language": "bengali"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_bengali": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "decimal_digit",
  27. "bengali_keywords",
  28. "indic_normalization",
  29. "bengali_normalization",
  30. "bengali_stop",
  31. "bengali_stemmer"
  32. ]
  33. }
  34. }
  35. }
  36. },
  37. )
  38. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'bengali_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. bengali_stop: {
  8. type: 'stop',
  9. stopwords: '_bengali_'
  10. },
  11. bengali_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'উদাহরণ'
  15. ]
  16. },
  17. bengali_stemmer: {
  18. type: 'stemmer',
  19. language: 'bengali'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_bengali: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'decimal_digit',
  28. 'bengali_keywords',
  29. 'indic_normalization',
  30. 'bengali_normalization',
  31. 'bengali_stop',
  32. 'bengali_stemmer'
  33. ]
  34. }
  35. }
  36. }
  37. }
  38. }
  39. )
  40. puts response

Js

  1. const response = await client.indices.create({
  2. index: "bengali_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. bengali_stop: {
  7. type: "stop",
  8. stopwords: "_bengali_",
  9. },
  10. bengali_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["উদাহরণ"],
  13. },
  14. bengali_stemmer: {
  15. type: "stemmer",
  16. language: "bengali",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_bengali: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "decimal_digit",
  25. "bengali_keywords",
  26. "indic_normalization",
  27. "bengali_normalization",
  28. "bengali_stop",
  29. "bengali_stemmer",
  30. ],
  31. },
  32. },
  33. },
  34. },
  35. });
  36. console.log(response);

コンソール

  1. PUT /bengali_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "bengali_stop": {
  7. "type": "stop",
  8. "stopwords": "_bengali_"
  9. },
  10. "bengali_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["উদাহরণ"]
  13. },
  14. "bengali_stemmer": {
  15. "type": "stemmer",
  16. "language": "bengali"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_bengali": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "decimal_digit",
  25. "bengali_keywords",
  26. "indic_normalization",
  27. "bengali_normalization",
  28. "bengali_stop",
  29. "bengali_stemmer"
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ブラジルポルトガル語アナライザー

brazilian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="brazilian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "brazilian_stop": {
  7. "type": "stop",
  8. "stopwords": "_brazilian_"
  9. },
  10. "brazilian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "exemplo"
  14. ]
  15. },
  16. "brazilian_stemmer": {
  17. "type": "stemmer",
  18. "language": "brazilian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_brazilian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "brazilian_stop",
  27. "brazilian_keywords",
  28. "brazilian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'brazilian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. brazilian_stop: {
  8. type: 'stop',
  9. stopwords: '_brazilian_'
  10. },
  11. brazilian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'exemplo'
  15. ]
  16. },
  17. brazilian_stemmer: {
  18. type: 'stemmer',
  19. language: 'brazilian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_brazilian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'brazilian_stop',
  28. 'brazilian_keywords',
  29. 'brazilian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "brazilian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. brazilian_stop: {
  7. type: "stop",
  8. stopwords: "_brazilian_",
  9. },
  10. brazilian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["exemplo"],
  13. },
  14. brazilian_stemmer: {
  15. type: "stemmer",
  16. language: "brazilian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_brazilian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "brazilian_stop",
  25. "brazilian_keywords",
  26. "brazilian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /brazilian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "brazilian_stop": {
  7. "type": "stop",
  8. "stopwords": "_brazilian_"
  9. },
  10. "brazilian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["exemplo"]
  13. },
  14. "brazilian_stemmer": {
  15. "type": "stemmer",
  16. "language": "brazilian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_brazilian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "brazilian_stop",
  25. "brazilian_keywords",
  26. "brazilian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ブルガリア語アナライザー

bulgarian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="bulgarian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "bulgarian_stop": {
  7. "type": "stop",
  8. "stopwords": "_bulgarian_"
  9. },
  10. "bulgarian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "пример"
  14. ]
  15. },
  16. "bulgarian_stemmer": {
  17. "type": "stemmer",
  18. "language": "bulgarian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_bulgarian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "bulgarian_stop",
  27. "bulgarian_keywords",
  28. "bulgarian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'bulgarian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. bulgarian_stop: {
  8. type: 'stop',
  9. stopwords: '_bulgarian_'
  10. },
  11. bulgarian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'пример'
  15. ]
  16. },
  17. bulgarian_stemmer: {
  18. type: 'stemmer',
  19. language: 'bulgarian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_bulgarian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'bulgarian_stop',
  28. 'bulgarian_keywords',
  29. 'bulgarian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "bulgarian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. bulgarian_stop: {
  7. type: "stop",
  8. stopwords: "_bulgarian_",
  9. },
  10. bulgarian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["пример"],
  13. },
  14. bulgarian_stemmer: {
  15. type: "stemmer",
  16. language: "bulgarian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_bulgarian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "bulgarian_stop",
  25. "bulgarian_keywords",
  26. "bulgarian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /bulgarian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "bulgarian_stop": {
  7. "type": "stop",
  8. "stopwords": "_bulgarian_"
  9. },
  10. "bulgarian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["пример"]
  13. },
  14. "bulgarian_stemmer": {
  15. "type": "stemmer",
  16. "language": "bulgarian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_bulgarian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "bulgarian_stop",
  25. "bulgarian_keywords",
  26. "bulgarian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

カタルーニャ語アナライザー

catalan アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="catalan_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "catalan_elision": {
  7. "type": "elision",
  8. "articles": [
  9. "d",
  10. "l",
  11. "m",
  12. "n",
  13. "s",
  14. "t"
  15. ],
  16. "articles_case": True
  17. },
  18. "catalan_stop": {
  19. "type": "stop",
  20. "stopwords": "_catalan_"
  21. },
  22. "catalan_keywords": {
  23. "type": "keyword_marker",
  24. "keywords": [
  25. "example"
  26. ]
  27. },
  28. "catalan_stemmer": {
  29. "type": "stemmer",
  30. "language": "catalan"
  31. }
  32. },
  33. "analyzer": {
  34. "rebuilt_catalan": {
  35. "tokenizer": "standard",
  36. "filter": [
  37. "catalan_elision",
  38. "lowercase",
  39. "catalan_stop",
  40. "catalan_keywords",
  41. "catalan_stemmer"
  42. ]
  43. }
  44. }
  45. }
  46. },
  47. )
  48. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'catalan_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. catalan_elision: {
  8. type: 'elision',
  9. articles: [
  10. 'd',
  11. 'l',
  12. 'm',
  13. 'n',
  14. 's',
  15. 't'
  16. ],
  17. articles_case: true
  18. },
  19. catalan_stop: {
  20. type: 'stop',
  21. stopwords: '_catalan_'
  22. },
  23. catalan_keywords: {
  24. type: 'keyword_marker',
  25. keywords: [
  26. 'example'
  27. ]
  28. },
  29. catalan_stemmer: {
  30. type: 'stemmer',
  31. language: 'catalan'
  32. }
  33. },
  34. analyzer: {
  35. rebuilt_catalan: {
  36. tokenizer: 'standard',
  37. filter: [
  38. 'catalan_elision',
  39. 'lowercase',
  40. 'catalan_stop',
  41. 'catalan_keywords',
  42. 'catalan_stemmer'
  43. ]
  44. }
  45. }
  46. }
  47. }
  48. }
  49. )
  50. puts response

Js

  1. const response = await client.indices.create({
  2. index: "catalan_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. catalan_elision: {
  7. type: "elision",
  8. articles: ["d", "l", "m", "n", "s", "t"],
  9. articles_case: true,
  10. },
  11. catalan_stop: {
  12. type: "stop",
  13. stopwords: "_catalan_",
  14. },
  15. catalan_keywords: {
  16. type: "keyword_marker",
  17. keywords: ["example"],
  18. },
  19. catalan_stemmer: {
  20. type: "stemmer",
  21. language: "catalan",
  22. },
  23. },
  24. analyzer: {
  25. rebuilt_catalan: {
  26. tokenizer: "standard",
  27. filter: [
  28. "catalan_elision",
  29. "lowercase",
  30. "catalan_stop",
  31. "catalan_keywords",
  32. "catalan_stemmer",
  33. ],
  34. },
  35. },
  36. },
  37. },
  38. });
  39. console.log(response);

コンソール

  1. PUT /catalan_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "catalan_elision": {
  7. "type": "elision",
  8. "articles": [ "d", "l", "m", "n", "s", "t"],
  9. "articles_case": true
  10. },
  11. "catalan_stop": {
  12. "type": "stop",
  13. "stopwords": "_catalan_"
  14. },
  15. "catalan_keywords": {
  16. "type": "keyword_marker",
  17. "keywords": ["example"]
  18. },
  19. "catalan_stemmer": {
  20. "type": "stemmer",
  21. "language": "catalan"
  22. }
  23. },
  24. "analyzer": {
  25. "rebuilt_catalan": {
  26. "tokenizer": "standard",
  27. "filter": [
  28. "catalan_elision",
  29. "lowercase",
  30. "catalan_stop",
  31. "catalan_keywords",
  32. "catalan_stemmer"
  33. ]
  34. }
  35. }
  36. }
  37. }
  38. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

CJKアナライザー

ICU分析プラグインの icu_analyzer が、CJKテキストに対して cjk アナライザーよりも優れていることがわかるかもしれません。テキストとクエリで実験してください。

cjk アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="cjk_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "english_stop": {
  7. "type": "stop",
  8. "stopwords": [
  9. "a",
  10. "and",
  11. "are",
  12. "as",
  13. "at",
  14. "be",
  15. "but",
  16. "by",
  17. "for",
  18. "if",
  19. "in",
  20. "into",
  21. "is",
  22. "it",
  23. "no",
  24. "not",
  25. "of",
  26. "on",
  27. "or",
  28. "s",
  29. "such",
  30. "t",
  31. "that",
  32. "the",
  33. "their",
  34. "then",
  35. "there",
  36. "these",
  37. "they",
  38. "this",
  39. "to",
  40. "was",
  41. "will",
  42. "with",
  43. "www"
  44. ]
  45. }
  46. },
  47. "analyzer": {
  48. "rebuilt_cjk": {
  49. "tokenizer": "standard",
  50. "filter": [
  51. "cjk_width",
  52. "lowercase",
  53. "cjk_bigram",
  54. "english_stop"
  55. ]
  56. }
  57. }
  58. }
  59. },
  60. )
  61. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'cjk_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. english_stop: {
  8. type: 'stop',
  9. stopwords: [
  10. 'a',
  11. 'and',
  12. 'are',
  13. 'as',
  14. 'at',
  15. 'be',
  16. 'but',
  17. 'by',
  18. 'for',
  19. 'if',
  20. 'in',
  21. 'into',
  22. 'is',
  23. 'it',
  24. 'no',
  25. 'not',
  26. 'of',
  27. 'on',
  28. 'or',
  29. 's',
  30. 'such',
  31. 't',
  32. 'that',
  33. 'the',
  34. 'their',
  35. 'then',
  36. 'there',
  37. 'these',
  38. 'they',
  39. 'this',
  40. 'to',
  41. 'was',
  42. 'will',
  43. 'with',
  44. 'www'
  45. ]
  46. }
  47. },
  48. analyzer: {
  49. rebuilt_cjk: {
  50. tokenizer: 'standard',
  51. filter: [
  52. 'cjk_width',
  53. 'lowercase',
  54. 'cjk_bigram',
  55. 'english_stop'
  56. ]
  57. }
  58. }
  59. }
  60. }
  61. }
  62. )
  63. puts response

Js

  1. const response = await client.indices.create({
  2. index: "cjk_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. english_stop: {
  7. type: "stop",
  8. stopwords: [
  9. "a",
  10. "and",
  11. "are",
  12. "as",
  13. "at",
  14. "be",
  15. "but",
  16. "by",
  17. "for",
  18. "if",
  19. "in",
  20. "into",
  21. "is",
  22. "it",
  23. "no",
  24. "not",
  25. "of",
  26. "on",
  27. "or",
  28. "s",
  29. "such",
  30. "t",
  31. "that",
  32. "the",
  33. "their",
  34. "then",
  35. "there",
  36. "these",
  37. "they",
  38. "this",
  39. "to",
  40. "was",
  41. "will",
  42. "with",
  43. "www",
  44. ],
  45. },
  46. },
  47. analyzer: {
  48. rebuilt_cjk: {
  49. tokenizer: "standard",
  50. filter: ["cjk_width", "lowercase", "cjk_bigram", "english_stop"],
  51. },
  52. },
  53. },
  54. },
  55. });
  56. console.log(response);

コンソール

  1. PUT /cjk_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "english_stop": {
  7. "type": "stop",
  8. "stopwords": [
  9. "a", "and", "are", "as", "at", "be", "but", "by", "for",
  10. "if", "in", "into", "is", "it", "no", "not", "of", "on",
  11. "or", "s", "such", "t", "that", "the", "their", "then",
  12. "there", "these", "they", "this", "to", "was", "will",
  13. "with", "www"
  14. ]
  15. }
  16. },
  17. "analyzer": {
  18. "rebuilt_cjk": {
  19. "tokenizer": "standard",
  20. "filter": [
  21. "cjk_width",
  22. "lowercase",
  23. "cjk_bigram",
  24. "english_stop"
  25. ]
  26. }
  27. }
  28. }
  29. }
  30. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。デフォルトのストップワードは、_english_ セットとほぼ同じですが、正確には同じではありません。

チェコ語アナライザー

czech アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="czech_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "czech_stop": {
  7. "type": "stop",
  8. "stopwords": "_czech_"
  9. },
  10. "czech_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "příklad"
  14. ]
  15. },
  16. "czech_stemmer": {
  17. "type": "stemmer",
  18. "language": "czech"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_czech": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "czech_stop",
  27. "czech_keywords",
  28. "czech_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'czech_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. czech_stop: {
  8. type: 'stop',
  9. stopwords: '_czech_'
  10. },
  11. czech_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'příklad'
  15. ]
  16. },
  17. czech_stemmer: {
  18. type: 'stemmer',
  19. language: 'czech'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_czech: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'czech_stop',
  28. 'czech_keywords',
  29. 'czech_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "czech_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. czech_stop: {
  7. type: "stop",
  8. stopwords: "_czech_",
  9. },
  10. czech_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["příklad"],
  13. },
  14. czech_stemmer: {
  15. type: "stemmer",
  16. language: "czech",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_czech: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "czech_stop",
  25. "czech_keywords",
  26. "czech_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /czech_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "czech_stop": {
  7. "type": "stop",
  8. "stopwords": "_czech_"
  9. },
  10. "czech_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["příklad"]
  13. },
  14. "czech_stemmer": {
  15. "type": "stemmer",
  16. "language": "czech"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_czech": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "czech_stop",
  25. "czech_keywords",
  26. "czech_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

デンマーク語アナライザー

danish アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="danish_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "danish_stop": {
  7. "type": "stop",
  8. "stopwords": "_danish_"
  9. },
  10. "danish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "eksempel"
  14. ]
  15. },
  16. "danish_stemmer": {
  17. "type": "stemmer",
  18. "language": "danish"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_danish": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "danish_stop",
  27. "danish_keywords",
  28. "danish_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'danish_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. danish_stop: {
  8. type: 'stop',
  9. stopwords: '_danish_'
  10. },
  11. danish_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'eksempel'
  15. ]
  16. },
  17. danish_stemmer: {
  18. type: 'stemmer',
  19. language: 'danish'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_danish: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'danish_stop',
  28. 'danish_keywords',
  29. 'danish_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "danish_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. danish_stop: {
  7. type: "stop",
  8. stopwords: "_danish_",
  9. },
  10. danish_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["eksempel"],
  13. },
  14. danish_stemmer: {
  15. type: "stemmer",
  16. language: "danish",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_danish: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "danish_stop",
  25. "danish_keywords",
  26. "danish_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /danish_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "danish_stop": {
  7. "type": "stop",
  8. "stopwords": "_danish_"
  9. },
  10. "danish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["eksempel"]
  13. },
  14. "danish_stemmer": {
  15. "type": "stemmer",
  16. "language": "danish"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_danish": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "danish_stop",
  25. "danish_keywords",
  26. "danish_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

オランダ語アナライザー

dutch アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="dutch_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "dutch_stop": {
  7. "type": "stop",
  8. "stopwords": "_dutch_"
  9. },
  10. "dutch_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "voorbeeld"
  14. ]
  15. },
  16. "dutch_stemmer": {
  17. "type": "stemmer",
  18. "language": "dutch"
  19. },
  20. "dutch_override": {
  21. "type": "stemmer_override",
  22. "rules": [
  23. "fiets=>fiets",
  24. "bromfiets=>bromfiets",
  25. "ei=>eier",
  26. "kind=>kinder"
  27. ]
  28. }
  29. },
  30. "analyzer": {
  31. "rebuilt_dutch": {
  32. "tokenizer": "standard",
  33. "filter": [
  34. "lowercase",
  35. "dutch_stop",
  36. "dutch_keywords",
  37. "dutch_override",
  38. "dutch_stemmer"
  39. ]
  40. }
  41. }
  42. }
  43. },
  44. )
  45. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'dutch_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. dutch_stop: {
  8. type: 'stop',
  9. stopwords: '_dutch_'
  10. },
  11. dutch_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'voorbeeld'
  15. ]
  16. },
  17. dutch_stemmer: {
  18. type: 'stemmer',
  19. language: 'dutch'
  20. },
  21. dutch_override: {
  22. type: 'stemmer_override',
  23. rules: [
  24. 'fiets=>fiets',
  25. 'bromfiets=>bromfiets',
  26. 'ei=>eier',
  27. 'kind=>kinder'
  28. ]
  29. }
  30. },
  31. analyzer: {
  32. rebuilt_dutch: {
  33. tokenizer: 'standard',
  34. filter: [
  35. 'lowercase',
  36. 'dutch_stop',
  37. 'dutch_keywords',
  38. 'dutch_override',
  39. 'dutch_stemmer'
  40. ]
  41. }
  42. }
  43. }
  44. }
  45. }
  46. )
  47. puts response

Js

  1. const response = await client.indices.create({
  2. index: "dutch_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. dutch_stop: {
  7. type: "stop",
  8. stopwords: "_dutch_",
  9. },
  10. dutch_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["voorbeeld"],
  13. },
  14. dutch_stemmer: {
  15. type: "stemmer",
  16. language: "dutch",
  17. },
  18. dutch_override: {
  19. type: "stemmer_override",
  20. rules: [
  21. "fiets=>fiets",
  22. "bromfiets=>bromfiets",
  23. "ei=>eier",
  24. "kind=>kinder",
  25. ],
  26. },
  27. },
  28. analyzer: {
  29. rebuilt_dutch: {
  30. tokenizer: "standard",
  31. filter: [
  32. "lowercase",
  33. "dutch_stop",
  34. "dutch_keywords",
  35. "dutch_override",
  36. "dutch_stemmer",
  37. ],
  38. },
  39. },
  40. },
  41. },
  42. });
  43. console.log(response);

コンソール

  1. PUT /dutch_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "dutch_stop": {
  7. "type": "stop",
  8. "stopwords": "_dutch_"
  9. },
  10. "dutch_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["voorbeeld"]
  13. },
  14. "dutch_stemmer": {
  15. "type": "stemmer",
  16. "language": "dutch"
  17. },
  18. "dutch_override": {
  19. "type": "stemmer_override",
  20. "rules": [
  21. "fiets=>fiets",
  22. "bromfiets=>bromfiets",
  23. "ei=>eier",
  24. "kind=>kinder"
  25. ]
  26. }
  27. },
  28. "analyzer": {
  29. "rebuilt_dutch": {
  30. "tokenizer": "standard",
  31. "filter": [
  32. "lowercase",
  33. "dutch_stop",
  34. "dutch_keywords",
  35. "dutch_override",
  36. "dutch_stemmer"
  37. ]
  38. }
  39. }
  40. }
  41. }
  42. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

英語アナライザー

english アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="english_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "english_stop": {
  7. "type": "stop",
  8. "stopwords": "_english_"
  9. },
  10. "english_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "example"
  14. ]
  15. },
  16. "english_stemmer": {
  17. "type": "stemmer",
  18. "language": "english"
  19. },
  20. "english_possessive_stemmer": {
  21. "type": "stemmer",
  22. "language": "possessive_english"
  23. }
  24. },
  25. "analyzer": {
  26. "rebuilt_english": {
  27. "tokenizer": "standard",
  28. "filter": [
  29. "english_possessive_stemmer",
  30. "lowercase",
  31. "english_stop",
  32. "english_keywords",
  33. "english_stemmer"
  34. ]
  35. }
  36. }
  37. }
  38. },
  39. )
  40. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'english_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. english_stop: {
  8. type: 'stop',
  9. stopwords: '_english_'
  10. },
  11. english_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'example'
  15. ]
  16. },
  17. english_stemmer: {
  18. type: 'stemmer',
  19. language: 'english'
  20. },
  21. english_possessive_stemmer: {
  22. type: 'stemmer',
  23. language: 'possessive_english'
  24. }
  25. },
  26. analyzer: {
  27. rebuilt_english: {
  28. tokenizer: 'standard',
  29. filter: [
  30. 'english_possessive_stemmer',
  31. 'lowercase',
  32. 'english_stop',
  33. 'english_keywords',
  34. 'english_stemmer'
  35. ]
  36. }
  37. }
  38. }
  39. }
  40. }
  41. )
  42. puts response

Js

  1. const response = await client.indices.create({
  2. index: "english_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. english_stop: {
  7. type: "stop",
  8. stopwords: "_english_",
  9. },
  10. english_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["example"],
  13. },
  14. english_stemmer: {
  15. type: "stemmer",
  16. language: "english",
  17. },
  18. english_possessive_stemmer: {
  19. type: "stemmer",
  20. language: "possessive_english",
  21. },
  22. },
  23. analyzer: {
  24. rebuilt_english: {
  25. tokenizer: "standard",
  26. filter: [
  27. "english_possessive_stemmer",
  28. "lowercase",
  29. "english_stop",
  30. "english_keywords",
  31. "english_stemmer",
  32. ],
  33. },
  34. },
  35. },
  36. },
  37. });
  38. console.log(response);

コンソール

  1. PUT /english_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "english_stop": {
  7. "type": "stop",
  8. "stopwords": "_english_"
  9. },
  10. "english_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["example"]
  13. },
  14. "english_stemmer": {
  15. "type": "stemmer",
  16. "language": "english"
  17. },
  18. "english_possessive_stemmer": {
  19. "type": "stemmer",
  20. "language": "possessive_english"
  21. }
  22. },
  23. "analyzer": {
  24. "rebuilt_english": {
  25. "tokenizer": "standard",
  26. "filter": [
  27. "english_possessive_stemmer",
  28. "lowercase",
  29. "english_stop",
  30. "english_keywords",
  31. "english_stemmer"
  32. ]
  33. }
  34. }
  35. }
  36. }
  37. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

エストニア語アナライザー

estonian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="estonian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "estonian_stop": {
  7. "type": "stop",
  8. "stopwords": "_estonian_"
  9. },
  10. "estonian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "näide"
  14. ]
  15. },
  16. "estonian_stemmer": {
  17. "type": "stemmer",
  18. "language": "estonian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_estonian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "estonian_stop",
  27. "estonian_keywords",
  28. "estonian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'estonian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. estonian_stop: {
  8. type: 'stop',
  9. stopwords: '_estonian_'
  10. },
  11. estonian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'näide'
  15. ]
  16. },
  17. estonian_stemmer: {
  18. type: 'stemmer',
  19. language: 'estonian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_estonian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'estonian_stop',
  28. 'estonian_keywords',
  29. 'estonian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "estonian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. estonian_stop: {
  7. type: "stop",
  8. stopwords: "_estonian_",
  9. },
  10. estonian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["näide"],
  13. },
  14. estonian_stemmer: {
  15. type: "stemmer",
  16. language: "estonian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_estonian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "estonian_stop",
  25. "estonian_keywords",
  26. "estonian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /estonian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "estonian_stop": {
  7. "type": "stop",
  8. "stopwords": "_estonian_"
  9. },
  10. "estonian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["näide"]
  13. },
  14. "estonian_stemmer": {
  15. "type": "stemmer",
  16. "language": "estonian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_estonian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "estonian_stop",
  25. "estonian_keywords",
  26. "estonian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

フィンランド語アナライザー

finnish アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="finnish_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "finnish_stop": {
  7. "type": "stop",
  8. "stopwords": "_finnish_"
  9. },
  10. "finnish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "esimerkki"
  14. ]
  15. },
  16. "finnish_stemmer": {
  17. "type": "stemmer",
  18. "language": "finnish"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_finnish": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "finnish_stop",
  27. "finnish_keywords",
  28. "finnish_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'finnish_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. finnish_stop: {
  8. type: 'stop',
  9. stopwords: '_finnish_'
  10. },
  11. finnish_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'esimerkki'
  15. ]
  16. },
  17. finnish_stemmer: {
  18. type: 'stemmer',
  19. language: 'finnish'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_finnish: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'finnish_stop',
  28. 'finnish_keywords',
  29. 'finnish_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "finnish_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. finnish_stop: {
  7. type: "stop",
  8. stopwords: "_finnish_",
  9. },
  10. finnish_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["esimerkki"],
  13. },
  14. finnish_stemmer: {
  15. type: "stemmer",
  16. language: "finnish",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_finnish: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "finnish_stop",
  25. "finnish_keywords",
  26. "finnish_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /finnish_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "finnish_stop": {
  7. "type": "stop",
  8. "stopwords": "_finnish_"
  9. },
  10. "finnish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["esimerkki"]
  13. },
  14. "finnish_stemmer": {
  15. "type": "stemmer",
  16. "language": "finnish"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_finnish": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "finnish_stop",
  25. "finnish_keywords",
  26. "finnish_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

フランス語アナライザー

french アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="french_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "french_elision": {
  7. "type": "elision",
  8. "articles_case": True,
  9. "articles": [
  10. "l",
  11. "m",
  12. "t",
  13. "qu",
  14. "n",
  15. "s",
  16. "j",
  17. "d",
  18. "c",
  19. "jusqu",
  20. "quoiqu",
  21. "lorsqu",
  22. "puisqu"
  23. ]
  24. },
  25. "french_stop": {
  26. "type": "stop",
  27. "stopwords": "_french_"
  28. },
  29. "french_keywords": {
  30. "type": "keyword_marker",
  31. "keywords": [
  32. "Example"
  33. ]
  34. },
  35. "french_stemmer": {
  36. "type": "stemmer",
  37. "language": "light_french"
  38. }
  39. },
  40. "analyzer": {
  41. "rebuilt_french": {
  42. "tokenizer": "standard",
  43. "filter": [
  44. "french_elision",
  45. "lowercase",
  46. "french_stop",
  47. "french_keywords",
  48. "french_stemmer"
  49. ]
  50. }
  51. }
  52. }
  53. },
  54. )
  55. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'french_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. french_elision: {
  8. type: 'elision',
  9. articles_case: true,
  10. articles: [
  11. 'l',
  12. 'm',
  13. 't',
  14. 'qu',
  15. 'n',
  16. 's',
  17. 'j',
  18. 'd',
  19. 'c',
  20. 'jusqu',
  21. 'quoiqu',
  22. 'lorsqu',
  23. 'puisqu'
  24. ]
  25. },
  26. french_stop: {
  27. type: 'stop',
  28. stopwords: '_french_'
  29. },
  30. french_keywords: {
  31. type: 'keyword_marker',
  32. keywords: [
  33. 'Example'
  34. ]
  35. },
  36. french_stemmer: {
  37. type: 'stemmer',
  38. language: 'light_french'
  39. }
  40. },
  41. analyzer: {
  42. rebuilt_french: {
  43. tokenizer: 'standard',
  44. filter: [
  45. 'french_elision',
  46. 'lowercase',
  47. 'french_stop',
  48. 'french_keywords',
  49. 'french_stemmer'
  50. ]
  51. }
  52. }
  53. }
  54. }
  55. }
  56. )
  57. puts response

Js

  1. const response = await client.indices.create({
  2. index: "french_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. french_elision: {
  7. type: "elision",
  8. articles_case: true,
  9. articles: [
  10. "l",
  11. "m",
  12. "t",
  13. "qu",
  14. "n",
  15. "s",
  16. "j",
  17. "d",
  18. "c",
  19. "jusqu",
  20. "quoiqu",
  21. "lorsqu",
  22. "puisqu",
  23. ],
  24. },
  25. french_stop: {
  26. type: "stop",
  27. stopwords: "_french_",
  28. },
  29. french_keywords: {
  30. type: "keyword_marker",
  31. keywords: ["Example"],
  32. },
  33. french_stemmer: {
  34. type: "stemmer",
  35. language: "light_french",
  36. },
  37. },
  38. analyzer: {
  39. rebuilt_french: {
  40. tokenizer: "standard",
  41. filter: [
  42. "french_elision",
  43. "lowercase",
  44. "french_stop",
  45. "french_keywords",
  46. "french_stemmer",
  47. ],
  48. },
  49. },
  50. },
  51. },
  52. });
  53. console.log(response);

コンソール

  1. PUT /french_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "french_elision": {
  7. "type": "elision",
  8. "articles_case": true,
  9. "articles": [
  10. "l", "m", "t", "qu", "n", "s",
  11. "j", "d", "c", "jusqu", "quoiqu",
  12. "lorsqu", "puisqu"
  13. ]
  14. },
  15. "french_stop": {
  16. "type": "stop",
  17. "stopwords": "_french_"
  18. },
  19. "french_keywords": {
  20. "type": "keyword_marker",
  21. "keywords": ["Example"]
  22. },
  23. "french_stemmer": {
  24. "type": "stemmer",
  25. "language": "light_french"
  26. }
  27. },
  28. "analyzer": {
  29. "rebuilt_french": {
  30. "tokenizer": "standard",
  31. "filter": [
  32. "french_elision",
  33. "lowercase",
  34. "french_stop",
  35. "french_keywords",
  36. "french_stemmer"
  37. ]
  38. }
  39. }
  40. }
  41. }
  42. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ガリシア語アナライザー

galician アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="galician_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "galician_stop": {
  7. "type": "stop",
  8. "stopwords": "_galician_"
  9. },
  10. "galician_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "exemplo"
  14. ]
  15. },
  16. "galician_stemmer": {
  17. "type": "stemmer",
  18. "language": "galician"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_galician": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "galician_stop",
  27. "galician_keywords",
  28. "galician_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'galician_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. galician_stop: {
  8. type: 'stop',
  9. stopwords: '_galician_'
  10. },
  11. galician_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'exemplo'
  15. ]
  16. },
  17. galician_stemmer: {
  18. type: 'stemmer',
  19. language: 'galician'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_galician: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'galician_stop',
  28. 'galician_keywords',
  29. 'galician_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "galician_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. galician_stop: {
  7. type: "stop",
  8. stopwords: "_galician_",
  9. },
  10. galician_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["exemplo"],
  13. },
  14. galician_stemmer: {
  15. type: "stemmer",
  16. language: "galician",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_galician: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "galician_stop",
  25. "galician_keywords",
  26. "galician_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /galician_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "galician_stop": {
  7. "type": "stop",
  8. "stopwords": "_galician_"
  9. },
  10. "galician_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["exemplo"]
  13. },
  14. "galician_stemmer": {
  15. "type": "stemmer",
  16. "language": "galician"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_galician": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "galician_stop",
  25. "galician_keywords",
  26. "galician_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ドイツ語アナライザー

german アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="german_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "german_stop": {
  7. "type": "stop",
  8. "stopwords": "_german_"
  9. },
  10. "german_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "Beispiel"
  14. ]
  15. },
  16. "german_stemmer": {
  17. "type": "stemmer",
  18. "language": "light_german"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_german": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "german_stop",
  27. "german_keywords",
  28. "german_normalization",
  29. "german_stemmer"
  30. ]
  31. }
  32. }
  33. }
  34. },
  35. )
  36. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'german_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. german_stop: {
  8. type: 'stop',
  9. stopwords: '_german_'
  10. },
  11. german_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'Beispiel'
  15. ]
  16. },
  17. german_stemmer: {
  18. type: 'stemmer',
  19. language: 'light_german'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_german: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'german_stop',
  28. 'german_keywords',
  29. 'german_normalization',
  30. 'german_stemmer'
  31. ]
  32. }
  33. }
  34. }
  35. }
  36. }
  37. )
  38. puts response

Js

  1. const response = await client.indices.create({
  2. index: "german_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. german_stop: {
  7. type: "stop",
  8. stopwords: "_german_",
  9. },
  10. german_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["Beispiel"],
  13. },
  14. german_stemmer: {
  15. type: "stemmer",
  16. language: "light_german",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_german: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "german_stop",
  25. "german_keywords",
  26. "german_normalization",
  27. "german_stemmer",
  28. ],
  29. },
  30. },
  31. },
  32. },
  33. });
  34. console.log(response);

コンソール

  1. PUT /german_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "german_stop": {
  7. "type": "stop",
  8. "stopwords": "_german_"
  9. },
  10. "german_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["Beispiel"]
  13. },
  14. "german_stemmer": {
  15. "type": "stemmer",
  16. "language": "light_german"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_german": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "german_stop",
  25. "german_keywords",
  26. "german_normalization",
  27. "german_stemmer"
  28. ]
  29. }
  30. }
  31. }
  32. }
  33. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ギリシャ語アナライザー

greek アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="greek_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "greek_stop": {
  7. "type": "stop",
  8. "stopwords": "_greek_"
  9. },
  10. "greek_lowercase": {
  11. "type": "lowercase",
  12. "language": "greek"
  13. },
  14. "greek_keywords": {
  15. "type": "keyword_marker",
  16. "keywords": [
  17. "παράδειγμα"
  18. ]
  19. },
  20. "greek_stemmer": {
  21. "type": "stemmer",
  22. "language": "greek"
  23. }
  24. },
  25. "analyzer": {
  26. "rebuilt_greek": {
  27. "tokenizer": "standard",
  28. "filter": [
  29. "greek_lowercase",
  30. "greek_stop",
  31. "greek_keywords",
  32. "greek_stemmer"
  33. ]
  34. }
  35. }
  36. }
  37. },
  38. )
  39. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'greek_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. greek_stop: {
  8. type: 'stop',
  9. stopwords: '_greek_'
  10. },
  11. greek_lowercase: {
  12. type: 'lowercase',
  13. language: 'greek'
  14. },
  15. greek_keywords: {
  16. type: 'keyword_marker',
  17. keywords: [
  18. 'παράδειγμα'
  19. ]
  20. },
  21. greek_stemmer: {
  22. type: 'stemmer',
  23. language: 'greek'
  24. }
  25. },
  26. analyzer: {
  27. rebuilt_greek: {
  28. tokenizer: 'standard',
  29. filter: [
  30. 'greek_lowercase',
  31. 'greek_stop',
  32. 'greek_keywords',
  33. 'greek_stemmer'
  34. ]
  35. }
  36. }
  37. }
  38. }
  39. }
  40. )
  41. puts response

Js

  1. const response = await client.indices.create({
  2. index: "greek_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. greek_stop: {
  7. type: "stop",
  8. stopwords: "_greek_",
  9. },
  10. greek_lowercase: {
  11. type: "lowercase",
  12. language: "greek",
  13. },
  14. greek_keywords: {
  15. type: "keyword_marker",
  16. keywords: ["παράδειγμα"],
  17. },
  18. greek_stemmer: {
  19. type: "stemmer",
  20. language: "greek",
  21. },
  22. },
  23. analyzer: {
  24. rebuilt_greek: {
  25. tokenizer: "standard",
  26. filter: [
  27. "greek_lowercase",
  28. "greek_stop",
  29. "greek_keywords",
  30. "greek_stemmer",
  31. ],
  32. },
  33. },
  34. },
  35. },
  36. });
  37. console.log(response);

コンソール

  1. PUT /greek_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "greek_stop": {
  7. "type": "stop",
  8. "stopwords": "_greek_"
  9. },
  10. "greek_lowercase": {
  11. "type": "lowercase",
  12. "language": "greek"
  13. },
  14. "greek_keywords": {
  15. "type": "keyword_marker",
  16. "keywords": ["παράδειγμα"]
  17. },
  18. "greek_stemmer": {
  19. "type": "stemmer",
  20. "language": "greek"
  21. }
  22. },
  23. "analyzer": {
  24. "rebuilt_greek": {
  25. "tokenizer": "standard",
  26. "filter": [
  27. "greek_lowercase",
  28. "greek_stop",
  29. "greek_keywords",
  30. "greek_stemmer"
  31. ]
  32. }
  33. }
  34. }
  35. }
  36. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ヒンディー語アナライザー

hindi アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="hindi_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "hindi_stop": {
  7. "type": "stop",
  8. "stopwords": "_hindi_"
  9. },
  10. "hindi_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "उदाहरण"
  14. ]
  15. },
  16. "hindi_stemmer": {
  17. "type": "stemmer",
  18. "language": "hindi"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_hindi": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "decimal_digit",
  27. "hindi_keywords",
  28. "indic_normalization",
  29. "hindi_normalization",
  30. "hindi_stop",
  31. "hindi_stemmer"
  32. ]
  33. }
  34. }
  35. }
  36. },
  37. )
  38. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'hindi_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. hindi_stop: {
  8. type: 'stop',
  9. stopwords: '_hindi_'
  10. },
  11. hindi_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'उदाहरण'
  15. ]
  16. },
  17. hindi_stemmer: {
  18. type: 'stemmer',
  19. language: 'hindi'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_hindi: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'decimal_digit',
  28. 'hindi_keywords',
  29. 'indic_normalization',
  30. 'hindi_normalization',
  31. 'hindi_stop',
  32. 'hindi_stemmer'
  33. ]
  34. }
  35. }
  36. }
  37. }
  38. }
  39. )
  40. puts response

Js

  1. const response = await client.indices.create({
  2. index: "hindi_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. hindi_stop: {
  7. type: "stop",
  8. stopwords: "_hindi_",
  9. },
  10. hindi_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["उदाहरण"],
  13. },
  14. hindi_stemmer: {
  15. type: "stemmer",
  16. language: "hindi",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_hindi: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "decimal_digit",
  25. "hindi_keywords",
  26. "indic_normalization",
  27. "hindi_normalization",
  28. "hindi_stop",
  29. "hindi_stemmer",
  30. ],
  31. },
  32. },
  33. },
  34. },
  35. });
  36. console.log(response);

コンソール

  1. PUT /hindi_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "hindi_stop": {
  7. "type": "stop",
  8. "stopwords": "_hindi_"
  9. },
  10. "hindi_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["उदाहरण"]
  13. },
  14. "hindi_stemmer": {
  15. "type": "stemmer",
  16. "language": "hindi"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_hindi": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "decimal_digit",
  25. "hindi_keywords",
  26. "indic_normalization",
  27. "hindi_normalization",
  28. "hindi_stop",
  29. "hindi_stemmer"
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ハンガリー語アナライザー

hungarian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="hungarian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "hungarian_stop": {
  7. "type": "stop",
  8. "stopwords": "_hungarian_"
  9. },
  10. "hungarian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "példa"
  14. ]
  15. },
  16. "hungarian_stemmer": {
  17. "type": "stemmer",
  18. "language": "hungarian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_hungarian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "hungarian_stop",
  27. "hungarian_keywords",
  28. "hungarian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'hungarian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. hungarian_stop: {
  8. type: 'stop',
  9. stopwords: '_hungarian_'
  10. },
  11. hungarian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'példa'
  15. ]
  16. },
  17. hungarian_stemmer: {
  18. type: 'stemmer',
  19. language: 'hungarian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_hungarian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'hungarian_stop',
  28. 'hungarian_keywords',
  29. 'hungarian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "hungarian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. hungarian_stop: {
  7. type: "stop",
  8. stopwords: "_hungarian_",
  9. },
  10. hungarian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["példa"],
  13. },
  14. hungarian_stemmer: {
  15. type: "stemmer",
  16. language: "hungarian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_hungarian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "hungarian_stop",
  25. "hungarian_keywords",
  26. "hungarian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /hungarian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "hungarian_stop": {
  7. "type": "stop",
  8. "stopwords": "_hungarian_"
  9. },
  10. "hungarian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["példa"]
  13. },
  14. "hungarian_stemmer": {
  15. "type": "stemmer",
  16. "language": "hungarian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_hungarian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "hungarian_stop",
  25. "hungarian_keywords",
  26. "hungarian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

インドネシア語アナライザー

indonesian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="indonesian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "indonesian_stop": {
  7. "type": "stop",
  8. "stopwords": "_indonesian_"
  9. },
  10. "indonesian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "contoh"
  14. ]
  15. },
  16. "indonesian_stemmer": {
  17. "type": "stemmer",
  18. "language": "indonesian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_indonesian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "indonesian_stop",
  27. "indonesian_keywords",
  28. "indonesian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'indonesian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. indonesian_stop: {
  8. type: 'stop',
  9. stopwords: '_indonesian_'
  10. },
  11. indonesian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'contoh'
  15. ]
  16. },
  17. indonesian_stemmer: {
  18. type: 'stemmer',
  19. language: 'indonesian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_indonesian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'indonesian_stop',
  28. 'indonesian_keywords',
  29. 'indonesian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "indonesian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. indonesian_stop: {
  7. type: "stop",
  8. stopwords: "_indonesian_",
  9. },
  10. indonesian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["contoh"],
  13. },
  14. indonesian_stemmer: {
  15. type: "stemmer",
  16. language: "indonesian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_indonesian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "indonesian_stop",
  25. "indonesian_keywords",
  26. "indonesian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /indonesian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "indonesian_stop": {
  7. "type": "stop",
  8. "stopwords": "_indonesian_"
  9. },
  10. "indonesian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["contoh"]
  13. },
  14. "indonesian_stemmer": {
  15. "type": "stemmer",
  16. "language": "indonesian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_indonesian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "indonesian_stop",
  25. "indonesian_keywords",
  26. "indonesian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

アイルランド語アナライザー

irish アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="irish_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "irish_hyphenation": {
  7. "type": "stop",
  8. "stopwords": [
  9. "h",
  10. "n",
  11. "t"
  12. ],
  13. "ignore_case": True
  14. },
  15. "irish_elision": {
  16. "type": "elision",
  17. "articles": [
  18. "d",
  19. "m",
  20. "b"
  21. ],
  22. "articles_case": True
  23. },
  24. "irish_stop": {
  25. "type": "stop",
  26. "stopwords": "_irish_"
  27. },
  28. "irish_lowercase": {
  29. "type": "lowercase",
  30. "language": "irish"
  31. },
  32. "irish_keywords": {
  33. "type": "keyword_marker",
  34. "keywords": [
  35. "sampla"
  36. ]
  37. },
  38. "irish_stemmer": {
  39. "type": "stemmer",
  40. "language": "irish"
  41. }
  42. },
  43. "analyzer": {
  44. "rebuilt_irish": {
  45. "tokenizer": "standard",
  46. "filter": [
  47. "irish_hyphenation",
  48. "irish_elision",
  49. "irish_lowercase",
  50. "irish_stop",
  51. "irish_keywords",
  52. "irish_stemmer"
  53. ]
  54. }
  55. }
  56. }
  57. },
  58. )
  59. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'irish_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. irish_hyphenation: {
  8. type: 'stop',
  9. stopwords: [
  10. 'h',
  11. 'n',
  12. 't'
  13. ],
  14. ignore_case: true
  15. },
  16. irish_elision: {
  17. type: 'elision',
  18. articles: [
  19. 'd',
  20. 'm',
  21. 'b'
  22. ],
  23. articles_case: true
  24. },
  25. irish_stop: {
  26. type: 'stop',
  27. stopwords: '_irish_'
  28. },
  29. irish_lowercase: {
  30. type: 'lowercase',
  31. language: 'irish'
  32. },
  33. irish_keywords: {
  34. type: 'keyword_marker',
  35. keywords: [
  36. 'sampla'
  37. ]
  38. },
  39. irish_stemmer: {
  40. type: 'stemmer',
  41. language: 'irish'
  42. }
  43. },
  44. analyzer: {
  45. rebuilt_irish: {
  46. tokenizer: 'standard',
  47. filter: [
  48. 'irish_hyphenation',
  49. 'irish_elision',
  50. 'irish_lowercase',
  51. 'irish_stop',
  52. 'irish_keywords',
  53. 'irish_stemmer'
  54. ]
  55. }
  56. }
  57. }
  58. }
  59. }
  60. )
  61. puts response

Js

  1. const response = await client.indices.create({
  2. index: "irish_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. irish_hyphenation: {
  7. type: "stop",
  8. stopwords: ["h", "n", "t"],
  9. ignore_case: true,
  10. },
  11. irish_elision: {
  12. type: "elision",
  13. articles: ["d", "m", "b"],
  14. articles_case: true,
  15. },
  16. irish_stop: {
  17. type: "stop",
  18. stopwords: "_irish_",
  19. },
  20. irish_lowercase: {
  21. type: "lowercase",
  22. language: "irish",
  23. },
  24. irish_keywords: {
  25. type: "keyword_marker",
  26. keywords: ["sampla"],
  27. },
  28. irish_stemmer: {
  29. type: "stemmer",
  30. language: "irish",
  31. },
  32. },
  33. analyzer: {
  34. rebuilt_irish: {
  35. tokenizer: "standard",
  36. filter: [
  37. "irish_hyphenation",
  38. "irish_elision",
  39. "irish_lowercase",
  40. "irish_stop",
  41. "irish_keywords",
  42. "irish_stemmer",
  43. ],
  44. },
  45. },
  46. },
  47. },
  48. });
  49. console.log(response);

コンソール

  1. PUT /irish_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "irish_hyphenation": {
  7. "type": "stop",
  8. "stopwords": [ "h", "n", "t" ],
  9. "ignore_case": true
  10. },
  11. "irish_elision": {
  12. "type": "elision",
  13. "articles": [ "d", "m", "b" ],
  14. "articles_case": true
  15. },
  16. "irish_stop": {
  17. "type": "stop",
  18. "stopwords": "_irish_"
  19. },
  20. "irish_lowercase": {
  21. "type": "lowercase",
  22. "language": "irish"
  23. },
  24. "irish_keywords": {
  25. "type": "keyword_marker",
  26. "keywords": ["sampla"]
  27. },
  28. "irish_stemmer": {
  29. "type": "stemmer",
  30. "language": "irish"
  31. }
  32. },
  33. "analyzer": {
  34. "rebuilt_irish": {
  35. "tokenizer": "standard",
  36. "filter": [
  37. "irish_hyphenation",
  38. "irish_elision",
  39. "irish_lowercase",
  40. "irish_stop",
  41. "irish_keywords",
  42. "irish_stemmer"
  43. ]
  44. }
  45. }
  46. }
  47. }
  48. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

イタリア語アナライザー

italian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="italian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "italian_elision": {
  7. "type": "elision",
  8. "articles": [
  9. "c",
  10. "l",
  11. "all",
  12. "dall",
  13. "dell",
  14. "nell",
  15. "sull",
  16. "coll",
  17. "pell",
  18. "gl",
  19. "agl",
  20. "dagl",
  21. "degl",
  22. "negl",
  23. "sugl",
  24. "un",
  25. "m",
  26. "t",
  27. "s",
  28. "v",
  29. "d"
  30. ],
  31. "articles_case": True
  32. },
  33. "italian_stop": {
  34. "type": "stop",
  35. "stopwords": "_italian_"
  36. },
  37. "italian_keywords": {
  38. "type": "keyword_marker",
  39. "keywords": [
  40. "esempio"
  41. ]
  42. },
  43. "italian_stemmer": {
  44. "type": "stemmer",
  45. "language": "light_italian"
  46. }
  47. },
  48. "analyzer": {
  49. "rebuilt_italian": {
  50. "tokenizer": "standard",
  51. "filter": [
  52. "italian_elision",
  53. "lowercase",
  54. "italian_stop",
  55. "italian_keywords",
  56. "italian_stemmer"
  57. ]
  58. }
  59. }
  60. }
  61. },
  62. )
  63. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'italian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. italian_elision: {
  8. type: 'elision',
  9. articles: [
  10. 'c',
  11. 'l',
  12. 'all',
  13. 'dall',
  14. 'dell',
  15. 'nell',
  16. 'sull',
  17. 'coll',
  18. 'pell',
  19. 'gl',
  20. 'agl',
  21. 'dagl',
  22. 'degl',
  23. 'negl',
  24. 'sugl',
  25. 'un',
  26. 'm',
  27. 't',
  28. 's',
  29. 'v',
  30. 'd'
  31. ],
  32. articles_case: true
  33. },
  34. italian_stop: {
  35. type: 'stop',
  36. stopwords: '_italian_'
  37. },
  38. italian_keywords: {
  39. type: 'keyword_marker',
  40. keywords: [
  41. 'esempio'
  42. ]
  43. },
  44. italian_stemmer: {
  45. type: 'stemmer',
  46. language: 'light_italian'
  47. }
  48. },
  49. analyzer: {
  50. rebuilt_italian: {
  51. tokenizer: 'standard',
  52. filter: [
  53. 'italian_elision',
  54. 'lowercase',
  55. 'italian_stop',
  56. 'italian_keywords',
  57. 'italian_stemmer'
  58. ]
  59. }
  60. }
  61. }
  62. }
  63. }
  64. )
  65. puts response

Js

  1. const response = await client.indices.create({
  2. index: "italian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. italian_elision: {
  7. type: "elision",
  8. articles: [
  9. "c",
  10. "l",
  11. "all",
  12. "dall",
  13. "dell",
  14. "nell",
  15. "sull",
  16. "coll",
  17. "pell",
  18. "gl",
  19. "agl",
  20. "dagl",
  21. "degl",
  22. "negl",
  23. "sugl",
  24. "un",
  25. "m",
  26. "t",
  27. "s",
  28. "v",
  29. "d",
  30. ],
  31. articles_case: true,
  32. },
  33. italian_stop: {
  34. type: "stop",
  35. stopwords: "_italian_",
  36. },
  37. italian_keywords: {
  38. type: "keyword_marker",
  39. keywords: ["esempio"],
  40. },
  41. italian_stemmer: {
  42. type: "stemmer",
  43. language: "light_italian",
  44. },
  45. },
  46. analyzer: {
  47. rebuilt_italian: {
  48. tokenizer: "standard",
  49. filter: [
  50. "italian_elision",
  51. "lowercase",
  52. "italian_stop",
  53. "italian_keywords",
  54. "italian_stemmer",
  55. ],
  56. },
  57. },
  58. },
  59. },
  60. });
  61. console.log(response);

コンソール

  1. PUT /italian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "italian_elision": {
  7. "type": "elision",
  8. "articles": [
  9. "c", "l", "all", "dall", "dell",
  10. "nell", "sull", "coll", "pell",
  11. "gl", "agl", "dagl", "degl", "negl",
  12. "sugl", "un", "m", "t", "s", "v", "d"
  13. ],
  14. "articles_case": true
  15. },
  16. "italian_stop": {
  17. "type": "stop",
  18. "stopwords": "_italian_"
  19. },
  20. "italian_keywords": {
  21. "type": "keyword_marker",
  22. "keywords": ["esempio"]
  23. },
  24. "italian_stemmer": {
  25. "type": "stemmer",
  26. "language": "light_italian"
  27. }
  28. },
  29. "analyzer": {
  30. "rebuilt_italian": {
  31. "tokenizer": "standard",
  32. "filter": [
  33. "italian_elision",
  34. "lowercase",
  35. "italian_stop",
  36. "italian_keywords",
  37. "italian_stemmer"
  38. ]
  39. }
  40. }
  41. }
  42. }
  43. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ラトビア語アナライザー

latvian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="latvian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "latvian_stop": {
  7. "type": "stop",
  8. "stopwords": "_latvian_"
  9. },
  10. "latvian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "piemērs"
  14. ]
  15. },
  16. "latvian_stemmer": {
  17. "type": "stemmer",
  18. "language": "latvian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_latvian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "latvian_stop",
  27. "latvian_keywords",
  28. "latvian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'latvian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. latvian_stop: {
  8. type: 'stop',
  9. stopwords: '_latvian_'
  10. },
  11. latvian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'piemērs'
  15. ]
  16. },
  17. latvian_stemmer: {
  18. type: 'stemmer',
  19. language: 'latvian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_latvian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'latvian_stop',
  28. 'latvian_keywords',
  29. 'latvian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "latvian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. latvian_stop: {
  7. type: "stop",
  8. stopwords: "_latvian_",
  9. },
  10. latvian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["piemērs"],
  13. },
  14. latvian_stemmer: {
  15. type: "stemmer",
  16. language: "latvian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_latvian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "latvian_stop",
  25. "latvian_keywords",
  26. "latvian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /latvian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "latvian_stop": {
  7. "type": "stop",
  8. "stopwords": "_latvian_"
  9. },
  10. "latvian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["piemērs"]
  13. },
  14. "latvian_stemmer": {
  15. "type": "stemmer",
  16. "language": "latvian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_latvian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "latvian_stop",
  25. "latvian_keywords",
  26. "latvian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

リトアニア語アナライザー

lithuanian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="lithuanian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "lithuanian_stop": {
  7. "type": "stop",
  8. "stopwords": "_lithuanian_"
  9. },
  10. "lithuanian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "pavyzdys"
  14. ]
  15. },
  16. "lithuanian_stemmer": {
  17. "type": "stemmer",
  18. "language": "lithuanian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_lithuanian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "lithuanian_stop",
  27. "lithuanian_keywords",
  28. "lithuanian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'lithuanian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. lithuanian_stop: {
  8. type: 'stop',
  9. stopwords: '_lithuanian_'
  10. },
  11. lithuanian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'pavyzdys'
  15. ]
  16. },
  17. lithuanian_stemmer: {
  18. type: 'stemmer',
  19. language: 'lithuanian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_lithuanian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'lithuanian_stop',
  28. 'lithuanian_keywords',
  29. 'lithuanian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "lithuanian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. lithuanian_stop: {
  7. type: "stop",
  8. stopwords: "_lithuanian_",
  9. },
  10. lithuanian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["pavyzdys"],
  13. },
  14. lithuanian_stemmer: {
  15. type: "stemmer",
  16. language: "lithuanian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_lithuanian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "lithuanian_stop",
  25. "lithuanian_keywords",
  26. "lithuanian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /lithuanian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "lithuanian_stop": {
  7. "type": "stop",
  8. "stopwords": "_lithuanian_"
  9. },
  10. "lithuanian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["pavyzdys"]
  13. },
  14. "lithuanian_stemmer": {
  15. "type": "stemmer",
  16. "language": "lithuanian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_lithuanian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "lithuanian_stop",
  25. "lithuanian_keywords",
  26. "lithuanian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ノルウェー語アナライザー

norwegian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="norwegian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "norwegian_stop": {
  7. "type": "stop",
  8. "stopwords": "_norwegian_"
  9. },
  10. "norwegian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "eksempel"
  14. ]
  15. },
  16. "norwegian_stemmer": {
  17. "type": "stemmer",
  18. "language": "norwegian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_norwegian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "norwegian_stop",
  27. "norwegian_keywords",
  28. "norwegian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'norwegian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. norwegian_stop: {
  8. type: 'stop',
  9. stopwords: '_norwegian_'
  10. },
  11. norwegian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'eksempel'
  15. ]
  16. },
  17. norwegian_stemmer: {
  18. type: 'stemmer',
  19. language: 'norwegian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_norwegian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'norwegian_stop',
  28. 'norwegian_keywords',
  29. 'norwegian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "norwegian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. norwegian_stop: {
  7. type: "stop",
  8. stopwords: "_norwegian_",
  9. },
  10. norwegian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["eksempel"],
  13. },
  14. norwegian_stemmer: {
  15. type: "stemmer",
  16. language: "norwegian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_norwegian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "norwegian_stop",
  25. "norwegian_keywords",
  26. "norwegian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /norwegian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "norwegian_stop": {
  7. "type": "stop",
  8. "stopwords": "_norwegian_"
  9. },
  10. "norwegian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["eksempel"]
  13. },
  14. "norwegian_stemmer": {
  15. "type": "stemmer",
  16. "language": "norwegian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_norwegian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "norwegian_stop",
  25. "norwegian_keywords",
  26. "norwegian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ペルシャ語アナライザー

persian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="persian_example",
  3. settings={
  4. "analysis": {
  5. "char_filter": {
  6. "zero_width_spaces": {
  7. "type": "mapping",
  8. "mappings": [
  9. "\\u200C=>\\u0020"
  10. ]
  11. }
  12. },
  13. "filter": {
  14. "persian_stop": {
  15. "type": "stop",
  16. "stopwords": "_persian_"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_persian": {
  21. "tokenizer": "standard",
  22. "char_filter": [
  23. "zero_width_spaces"
  24. ],
  25. "filter": [
  26. "lowercase",
  27. "decimal_digit",
  28. "arabic_normalization",
  29. "persian_normalization",
  30. "persian_stop"
  31. ]
  32. }
  33. }
  34. }
  35. },
  36. )
  37. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'persian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. char_filter: {
  7. zero_width_spaces: {
  8. type: 'mapping',
  9. mappings: [
  10. '\\u200C=>\\u0020'
  11. ]
  12. }
  13. },
  14. filter: {
  15. persian_stop: {
  16. type: 'stop',
  17. stopwords: '_persian_'
  18. }
  19. },
  20. analyzer: {
  21. rebuilt_persian: {
  22. tokenizer: 'standard',
  23. char_filter: [
  24. 'zero_width_spaces'
  25. ],
  26. filter: [
  27. 'lowercase',
  28. 'decimal_digit',
  29. 'arabic_normalization',
  30. 'persian_normalization',
  31. 'persian_stop'
  32. ]
  33. }
  34. }
  35. }
  36. }
  37. }
  38. )
  39. puts response

Js

  1. const response = await client.indices.create({
  2. index: "persian_example",
  3. settings: {
  4. analysis: {
  5. char_filter: {
  6. zero_width_spaces: {
  7. type: "mapping",
  8. mappings: ["\\u200C=>\\u0020"],
  9. },
  10. },
  11. filter: {
  12. persian_stop: {
  13. type: "stop",
  14. stopwords: "_persian_",
  15. },
  16. },
  17. analyzer: {
  18. rebuilt_persian: {
  19. tokenizer: "standard",
  20. char_filter: ["zero_width_spaces"],
  21. filter: [
  22. "lowercase",
  23. "decimal_digit",
  24. "arabic_normalization",
  25. "persian_normalization",
  26. "persian_stop",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /persian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "char_filter": {
  6. "zero_width_spaces": {
  7. "type": "mapping",
  8. "mappings": [ "\\u200C=>\\u0020"]
  9. }
  10. },
  11. "filter": {
  12. "persian_stop": {
  13. "type": "stop",
  14. "stopwords": "_persian_"
  15. }
  16. },
  17. "analyzer": {
  18. "rebuilt_persian": {
  19. "tokenizer": "standard",
  20. "char_filter": [ "zero_width_spaces" ],
  21. "filter": [
  22. "lowercase",
  23. "decimal_digit",
  24. "arabic_normalization",
  25. "persian_normalization",
  26. "persian_stop"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
ゼロ幅非結合文字をASCIIスペースに置き換えます。
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。

ポルトガル語アナライザー

portuguese アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="portuguese_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "portuguese_stop": {
  7. "type": "stop",
  8. "stopwords": "_portuguese_"
  9. },
  10. "portuguese_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "exemplo"
  14. ]
  15. },
  16. "portuguese_stemmer": {
  17. "type": "stemmer",
  18. "language": "light_portuguese"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_portuguese": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "portuguese_stop",
  27. "portuguese_keywords",
  28. "portuguese_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'portuguese_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. portuguese_stop: {
  8. type: 'stop',
  9. stopwords: '_portuguese_'
  10. },
  11. portuguese_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'exemplo'
  15. ]
  16. },
  17. portuguese_stemmer: {
  18. type: 'stemmer',
  19. language: 'light_portuguese'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_portuguese: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'portuguese_stop',
  28. 'portuguese_keywords',
  29. 'portuguese_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "portuguese_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. portuguese_stop: {
  7. type: "stop",
  8. stopwords: "_portuguese_",
  9. },
  10. portuguese_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["exemplo"],
  13. },
  14. portuguese_stemmer: {
  15. type: "stemmer",
  16. language: "light_portuguese",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_portuguese: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "portuguese_stop",
  25. "portuguese_keywords",
  26. "portuguese_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /portuguese_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "portuguese_stop": {
  7. "type": "stop",
  8. "stopwords": "_portuguese_"
  9. },
  10. "portuguese_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["exemplo"]
  13. },
  14. "portuguese_stemmer": {
  15. "type": "stemmer",
  16. "language": "light_portuguese"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_portuguese": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "portuguese_stop",
  25. "portuguese_keywords",
  26. "portuguese_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ルーマニア語アナライザー

romanian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="romanian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "romanian_stop": {
  7. "type": "stop",
  8. "stopwords": "_romanian_"
  9. },
  10. "romanian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "exemplu"
  14. ]
  15. },
  16. "romanian_stemmer": {
  17. "type": "stemmer",
  18. "language": "romanian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_romanian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "romanian_stop",
  27. "romanian_keywords",
  28. "romanian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'romanian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. romanian_stop: {
  8. type: 'stop',
  9. stopwords: '_romanian_'
  10. },
  11. romanian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'exemplu'
  15. ]
  16. },
  17. romanian_stemmer: {
  18. type: 'stemmer',
  19. language: 'romanian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_romanian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'romanian_stop',
  28. 'romanian_keywords',
  29. 'romanian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "romanian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. romanian_stop: {
  7. type: "stop",
  8. stopwords: "_romanian_",
  9. },
  10. romanian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["exemplu"],
  13. },
  14. romanian_stemmer: {
  15. type: "stemmer",
  16. language: "romanian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_romanian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "romanian_stop",
  25. "romanian_keywords",
  26. "romanian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /romanian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "romanian_stop": {
  7. "type": "stop",
  8. "stopwords": "_romanian_"
  9. },
  10. "romanian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["exemplu"]
  13. },
  14. "romanian_stemmer": {
  15. "type": "stemmer",
  16. "language": "romanian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_romanian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "romanian_stop",
  25. "romanian_keywords",
  26. "romanian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ロシア語アナライザー

russian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="russian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "russian_stop": {
  7. "type": "stop",
  8. "stopwords": "_russian_"
  9. },
  10. "russian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "пример"
  14. ]
  15. },
  16. "russian_stemmer": {
  17. "type": "stemmer",
  18. "language": "russian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_russian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "russian_stop",
  27. "russian_keywords",
  28. "russian_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'russian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. russian_stop: {
  8. type: 'stop',
  9. stopwords: '_russian_'
  10. },
  11. russian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'пример'
  15. ]
  16. },
  17. russian_stemmer: {
  18. type: 'stemmer',
  19. language: 'russian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_russian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'russian_stop',
  28. 'russian_keywords',
  29. 'russian_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "russian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. russian_stop: {
  7. type: "stop",
  8. stopwords: "_russian_",
  9. },
  10. russian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["пример"],
  13. },
  14. russian_stemmer: {
  15. type: "stemmer",
  16. language: "russian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_russian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "russian_stop",
  25. "russian_keywords",
  26. "russian_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /russian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "russian_stop": {
  7. "type": "stop",
  8. "stopwords": "_russian_"
  9. },
  10. "russian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["пример"]
  13. },
  14. "russian_stemmer": {
  15. "type": "stemmer",
  16. "language": "russian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_russian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "russian_stop",
  25. "russian_keywords",
  26. "russian_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

セルビア語アナライザー

serbian アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="serbian_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "serbian_stop": {
  7. "type": "stop",
  8. "stopwords": "_serbian_"
  9. },
  10. "serbian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "пример"
  14. ]
  15. },
  16. "serbian_stemmer": {
  17. "type": "stemmer",
  18. "language": "serbian"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_serbian": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "serbian_stop",
  27. "serbian_keywords",
  28. "serbian_stemmer",
  29. "serbian_normalization"
  30. ]
  31. }
  32. }
  33. }
  34. },
  35. )
  36. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'serbian_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. serbian_stop: {
  8. type: 'stop',
  9. stopwords: '_serbian_'
  10. },
  11. serbian_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'пример'
  15. ]
  16. },
  17. serbian_stemmer: {
  18. type: 'stemmer',
  19. language: 'serbian'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_serbian: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'serbian_stop',
  28. 'serbian_keywords',
  29. 'serbian_stemmer',
  30. 'serbian_normalization'
  31. ]
  32. }
  33. }
  34. }
  35. }
  36. }
  37. )
  38. puts response

Js

  1. const response = await client.indices.create({
  2. index: "serbian_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. serbian_stop: {
  7. type: "stop",
  8. stopwords: "_serbian_",
  9. },
  10. serbian_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["пример"],
  13. },
  14. serbian_stemmer: {
  15. type: "stemmer",
  16. language: "serbian",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_serbian: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "serbian_stop",
  25. "serbian_keywords",
  26. "serbian_stemmer",
  27. "serbian_normalization",
  28. ],
  29. },
  30. },
  31. },
  32. },
  33. });
  34. console.log(response);

コンソール

  1. PUT /serbian_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "serbian_stop": {
  7. "type": "stop",
  8. "stopwords": "_serbian_"
  9. },
  10. "serbian_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["пример"]
  13. },
  14. "serbian_stemmer": {
  15. "type": "stemmer",
  16. "language": "serbian"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_serbian": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "serbian_stop",
  25. "serbian_keywords",
  26. "serbian_stemmer",
  27. "serbian_normalization"
  28. ]
  29. }
  30. }
  31. }
  32. }
  33. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

ソラニ語アナライザー

sorani アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="sorani_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "sorani_stop": {
  7. "type": "stop",
  8. "stopwords": "_sorani_"
  9. },
  10. "sorani_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "mînak"
  14. ]
  15. },
  16. "sorani_stemmer": {
  17. "type": "stemmer",
  18. "language": "sorani"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_sorani": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "sorani_normalization",
  26. "lowercase",
  27. "decimal_digit",
  28. "sorani_stop",
  29. "sorani_keywords",
  30. "sorani_stemmer"
  31. ]
  32. }
  33. }
  34. }
  35. },
  36. )
  37. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'sorani_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. sorani_stop: {
  8. type: 'stop',
  9. stopwords: '_sorani_'
  10. },
  11. sorani_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'mînak'
  15. ]
  16. },
  17. sorani_stemmer: {
  18. type: 'stemmer',
  19. language: 'sorani'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_sorani: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'sorani_normalization',
  27. 'lowercase',
  28. 'decimal_digit',
  29. 'sorani_stop',
  30. 'sorani_keywords',
  31. 'sorani_stemmer'
  32. ]
  33. }
  34. }
  35. }
  36. }
  37. }
  38. )
  39. puts response

Js

  1. const response = await client.indices.create({
  2. index: "sorani_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. sorani_stop: {
  7. type: "stop",
  8. stopwords: "_sorani_",
  9. },
  10. sorani_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["mînak"],
  13. },
  14. sorani_stemmer: {
  15. type: "stemmer",
  16. language: "sorani",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_sorani: {
  21. tokenizer: "standard",
  22. filter: [
  23. "sorani_normalization",
  24. "lowercase",
  25. "decimal_digit",
  26. "sorani_stop",
  27. "sorani_keywords",
  28. "sorani_stemmer",
  29. ],
  30. },
  31. },
  32. },
  33. },
  34. });
  35. console.log(response);

コンソール

  1. PUT /sorani_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "sorani_stop": {
  7. "type": "stop",
  8. "stopwords": "_sorani_"
  9. },
  10. "sorani_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["mînak"]
  13. },
  14. "sorani_stemmer": {
  15. "type": "stemmer",
  16. "language": "sorani"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_sorani": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "sorani_normalization",
  24. "lowercase",
  25. "decimal_digit",
  26. "sorani_stop",
  27. "sorani_keywords",
  28. "sorani_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. }
  34. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

スペイン語アナライザー

spanish アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="spanish_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "spanish_stop": {
  7. "type": "stop",
  8. "stopwords": "_spanish_"
  9. },
  10. "spanish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "ejemplo"
  14. ]
  15. },
  16. "spanish_stemmer": {
  17. "type": "stemmer",
  18. "language": "light_spanish"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_spanish": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "spanish_stop",
  27. "spanish_keywords",
  28. "spanish_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'spanish_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. spanish_stop: {
  8. type: 'stop',
  9. stopwords: '_spanish_'
  10. },
  11. spanish_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'ejemplo'
  15. ]
  16. },
  17. spanish_stemmer: {
  18. type: 'stemmer',
  19. language: 'light_spanish'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_spanish: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'spanish_stop',
  28. 'spanish_keywords',
  29. 'spanish_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "spanish_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. spanish_stop: {
  7. type: "stop",
  8. stopwords: "_spanish_",
  9. },
  10. spanish_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["ejemplo"],
  13. },
  14. spanish_stemmer: {
  15. type: "stemmer",
  16. language: "light_spanish",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_spanish: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "spanish_stop",
  25. "spanish_keywords",
  26. "spanish_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /spanish_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "spanish_stop": {
  7. "type": "stop",
  8. "stopwords": "_spanish_"
  9. },
  10. "spanish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["ejemplo"]
  13. },
  14. "spanish_stemmer": {
  15. "type": "stemmer",
  16. "language": "light_spanish"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_spanish": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "spanish_stop",
  25. "spanish_keywords",
  26. "spanish_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

スウェーデン語アナライザー

swedish アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="swedish_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "swedish_stop": {
  7. "type": "stop",
  8. "stopwords": "_swedish_"
  9. },
  10. "swedish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": [
  13. "exempel"
  14. ]
  15. },
  16. "swedish_stemmer": {
  17. "type": "stemmer",
  18. "language": "swedish"
  19. }
  20. },
  21. "analyzer": {
  22. "rebuilt_swedish": {
  23. "tokenizer": "standard",
  24. "filter": [
  25. "lowercase",
  26. "swedish_stop",
  27. "swedish_keywords",
  28. "swedish_stemmer"
  29. ]
  30. }
  31. }
  32. }
  33. },
  34. )
  35. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'swedish_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. swedish_stop: {
  8. type: 'stop',
  9. stopwords: '_swedish_'
  10. },
  11. swedish_keywords: {
  12. type: 'keyword_marker',
  13. keywords: [
  14. 'exempel'
  15. ]
  16. },
  17. swedish_stemmer: {
  18. type: 'stemmer',
  19. language: 'swedish'
  20. }
  21. },
  22. analyzer: {
  23. rebuilt_swedish: {
  24. tokenizer: 'standard',
  25. filter: [
  26. 'lowercase',
  27. 'swedish_stop',
  28. 'swedish_keywords',
  29. 'swedish_stemmer'
  30. ]
  31. }
  32. }
  33. }
  34. }
  35. }
  36. )
  37. puts response

Js

  1. const response = await client.indices.create({
  2. index: "swedish_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. swedish_stop: {
  7. type: "stop",
  8. stopwords: "_swedish_",
  9. },
  10. swedish_keywords: {
  11. type: "keyword_marker",
  12. keywords: ["exempel"],
  13. },
  14. swedish_stemmer: {
  15. type: "stemmer",
  16. language: "swedish",
  17. },
  18. },
  19. analyzer: {
  20. rebuilt_swedish: {
  21. tokenizer: "standard",
  22. filter: [
  23. "lowercase",
  24. "swedish_stop",
  25. "swedish_keywords",
  26. "swedish_stemmer",
  27. ],
  28. },
  29. },
  30. },
  31. },
  32. });
  33. console.log(response);

コンソール

  1. PUT /swedish_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "swedish_stop": {
  7. "type": "stop",
  8. "stopwords": "_swedish_"
  9. },
  10. "swedish_keywords": {
  11. "type": "keyword_marker",
  12. "keywords": ["exempel"]
  13. },
  14. "swedish_stemmer": {
  15. "type": "stemmer",
  16. "language": "swedish"
  17. }
  18. },
  19. "analyzer": {
  20. "rebuilt_swedish": {
  21. "tokenizer": "standard",
  22. "filter": [
  23. "lowercase",
  24. "swedish_stop",
  25. "swedish_keywords",
  26. "swedish_stemmer"
  27. ]
  28. }
  29. }
  30. }
  31. }
  32. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

トルコ語アナライザー

turkish アナライザーは、custom アナライザーとして次のように再実装できます:

Python

  1. resp = client.indices.create(
  2. index="turkish_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "turkish_stop": {
  7. "type": "stop",
  8. "stopwords": "_turkish_"
  9. },
  10. "turkish_lowercase": {
  11. "type": "lowercase",
  12. "language": "turkish"
  13. },
  14. "turkish_keywords": {
  15. "type": "keyword_marker",
  16. "keywords": [
  17. "örnek"
  18. ]
  19. },
  20. "turkish_stemmer": {
  21. "type": "stemmer",
  22. "language": "turkish"
  23. }
  24. },
  25. "analyzer": {
  26. "rebuilt_turkish": {
  27. "tokenizer": "standard",
  28. "filter": [
  29. "apostrophe",
  30. "turkish_lowercase",
  31. "turkish_stop",
  32. "turkish_keywords",
  33. "turkish_stemmer"
  34. ]
  35. }
  36. }
  37. }
  38. },
  39. )
  40. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'turkish_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. turkish_stop: {
  8. type: 'stop',
  9. stopwords: '_turkish_'
  10. },
  11. turkish_lowercase: {
  12. type: 'lowercase',
  13. language: 'turkish'
  14. },
  15. turkish_keywords: {
  16. type: 'keyword_marker',
  17. keywords: [
  18. 'örnek'
  19. ]
  20. },
  21. turkish_stemmer: {
  22. type: 'stemmer',
  23. language: 'turkish'
  24. }
  25. },
  26. analyzer: {
  27. rebuilt_turkish: {
  28. tokenizer: 'standard',
  29. filter: [
  30. 'apostrophe',
  31. 'turkish_lowercase',
  32. 'turkish_stop',
  33. 'turkish_keywords',
  34. 'turkish_stemmer'
  35. ]
  36. }
  37. }
  38. }
  39. }
  40. }
  41. )
  42. puts response

Js

  1. const response = await client.indices.create({
  2. index: "turkish_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. turkish_stop: {
  7. type: "stop",
  8. stopwords: "_turkish_",
  9. },
  10. turkish_lowercase: {
  11. type: "lowercase",
  12. language: "turkish",
  13. },
  14. turkish_keywords: {
  15. type: "keyword_marker",
  16. keywords: ["örnek"],
  17. },
  18. turkish_stemmer: {
  19. type: "stemmer",
  20. language: "turkish",
  21. },
  22. },
  23. analyzer: {
  24. rebuilt_turkish: {
  25. tokenizer: "standard",
  26. filter: [
  27. "apostrophe",
  28. "turkish_lowercase",
  29. "turkish_stop",
  30. "turkish_keywords",
  31. "turkish_stemmer",
  32. ],
  33. },
  34. },
  35. },
  36. },
  37. });
  38. console.log(response);

コンソール

  1. PUT /turkish_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "turkish_stop": {
  7. "type": "stop",
  8. "stopwords": "_turkish_"
  9. },
  10. "turkish_lowercase": {
  11. "type": "lowercase",
  12. "language": "turkish"
  13. },
  14. "turkish_keywords": {
  15. "type": "keyword_marker",
  16. "keywords": ["örnek"]
  17. },
  18. "turkish_stemmer": {
  19. "type": "stemmer",
  20. "language": "turkish"
  21. }
  22. },
  23. "analyzer": {
  24. "rebuilt_turkish": {
  25. "tokenizer": "standard",
  26. "filter": [
  27. "apostrophe",
  28. "turkish_lowercase",
  29. "turkish_stop",
  30. "turkish_keywords",
  31. "turkish_stemmer"
  32. ]
  33. }
  34. }
  35. }
  36. }
  37. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメーターで上書きできます。
ステミングから除外すべき単語がない限り、このフィルターは削除する必要があります。

タイ分析器

thai 分析器は、次のように custom 分析器として再実装できます:

Python

  1. resp = client.indices.create(
  2. index="thai_example",
  3. settings={
  4. "analysis": {
  5. "filter": {
  6. "thai_stop": {
  7. "type": "stop",
  8. "stopwords": "_thai_"
  9. }
  10. },
  11. "analyzer": {
  12. "rebuilt_thai": {
  13. "tokenizer": "thai",
  14. "filter": [
  15. "lowercase",
  16. "decimal_digit",
  17. "thai_stop"
  18. ]
  19. }
  20. }
  21. }
  22. },
  23. )
  24. print(resp)

Ruby

  1. response = client.indices.create(
  2. index: 'thai_example',
  3. body: {
  4. settings: {
  5. analysis: {
  6. filter: {
  7. thai_stop: {
  8. type: 'stop',
  9. stopwords: '_thai_'
  10. }
  11. },
  12. analyzer: {
  13. rebuilt_thai: {
  14. tokenizer: 'thai',
  15. filter: [
  16. 'lowercase',
  17. 'decimal_digit',
  18. 'thai_stop'
  19. ]
  20. }
  21. }
  22. }
  23. }
  24. }
  25. )
  26. puts response

Js

  1. const response = await client.indices.create({
  2. index: "thai_example",
  3. settings: {
  4. analysis: {
  5. filter: {
  6. thai_stop: {
  7. type: "stop",
  8. stopwords: "_thai_",
  9. },
  10. },
  11. analyzer: {
  12. rebuilt_thai: {
  13. tokenizer: "thai",
  14. filter: ["lowercase", "decimal_digit", "thai_stop"],
  15. },
  16. },
  17. },
  18. },
  19. });
  20. console.log(response);

コンソール

  1. PUT /thai_example
  2. {
  3. "settings": {
  4. "analysis": {
  5. "filter": {
  6. "thai_stop": {
  7. "type": "stop",
  8. "stopwords": "_thai_"
  9. }
  10. },
  11. "analyzer": {
  12. "rebuilt_thai": {
  13. "tokenizer": "thai",
  14. "filter": [
  15. "lowercase",
  16. "decimal_digit",
  17. "thai_stop"
  18. ]
  19. }
  20. }
  21. }
  22. }
  23. }
デフォルトのストップワードは、stopwords
または stopwords_path パラメータで上書きできます。