基于java环境,基于Lucene之上包装一层外壳
Lucene是一个java的搜索引擎库,操作非常繁琐
全文检索和倒排索引:
数据里里的标题:
1.老男孩教育
2.老男孩教育linux学院
3.老男孩教育python学院
4.老男孩教育DBA
5.老男孩教育oldzhang
ES内部分词,评分,倒排索引:
老男孩 1 2 3 4 5
教育 1 2 3 4 5
学院 2 3
linux 2
python 3
DBA 4
用户输入:
老男孩学院
linux老男孩学院DBA
1.搜索: 电商,百科,app搜索
2.高亮显示: github
3.日志分析和数据挖掘: EBLK
1.高性能,天然分布式
2.对运维友好,不需要会java语言,开箱即用
3.功能丰富
mysql:
skuid name
1 狗粮100kg
2 猫粮50kg
3 猫罐头200g
ES:
聚合运算之后得到SKUID:
1
2
拿到ID之后,mysql就只需要简单地where查询即可
mysql:
select xx from xxx where skuid 1
iptables -nL
iptables -F
iptables -X
iptables -Z
iptables -nL
mkdir /data/soft
[root@db-01 /data/soft]# ll -h
total 268M
-rw-r--r-- 1 root root 109M Feb 25 2019 elasticsearch-6.6.0.rpm
-rw-r--r-- 1 root root 159M Sep 2 16:35 jdk-8u102-linux-x64.rpm
rpm -ivh jdk-8u102-linux-x64.rpm
[root@db-01 /data/soft]# java -version
openjdk version "1.8.0_212"
OpenJDK Runtime Environment (build 1.8.0_212-b04)
OpenJDK 64-Bit Server VM (build 25.212-b04, mixed mode)
rpm -ivh elasticsearch-6.6.0.rpm
systemctl daemon-reload
systemctl enable elasticsearch.service
systemctl start elasticsearch.service
netstat -lntup|grep 9200
[root@db01 /data/soft]# curl 127.0.0.1:9200
{
"name" : "pRG0qLR",
"cluster_name" : "elasticsearch",
"cluster_uuid" : "mNuJSe07QM61IOxecnanZg",
"version" : {
"number" : "6.6.0",
"build_flavor" : "default",
"build_type" : "rpm",
"build_hash" : "a9861f4",
"build_date" : "2019-01-24T11:27:09.439740Z",
"build_snapshot" : false,
"lucene_version" : "7.6.0",
"minimum_wire_compatibility_version" : "5.6.0",
"minimum_index_compatibility_version" : "5.0.0"
},
"tagline" : "You Know, for Search"
}
[root@db01 ~]# rpm -qc elasticsearch
/etc/elasticsearch/elasticsearch.yml #ES的主配置文件
/etc/elasticsearch/jvm.options #jvm虚拟机配置
/etc/sysconfig/elasticsearch #默认一些系统配置参数
/usr/lib/sysctl.d/elasticsearch.conf #配置参数,不需要改动
/usr/lib/systemd/system/elasticsearch.service #system启动文件
cp /etc/elasticsearch/elasticsearch.yml /opt/
cat >/etc/elasticsearch/elasticsearch.yml<<EOF
node.name: node-1
path.data: /var/lib/elasticsearch
path.logs: /var/log/elasticsearch
bootstrap.memory_lock: true
network.host: 10.0.0.51,127.0.0.1
http.port: 9200
EOF
systemctl restart elasticsearch.service
重启后查看日志发现提示内存锁定失败
[root@db01 ~]# tail -f /var/log/elasticsearch/elasticsearch.log
[2019-11-14T09:42:29,513][ERROR][o.e.b.Bootstrap ] [node-1] node validation exception
[1] bootstrap checks failed
[1]: memory locking requested for elasticsearch process but memory is not locked
解决方案:
官网参考地址:
https://www.elastic.co/guide/en/elasticsearch/reference/current/setting-system-settings.html#systemd
解决命令:
systemctl edit elasticsearch
[Service]
LimitMEMLOCK=infinity
systemctl daemon-reload
systemctl restart elasticsearch.service
注意:需要修改配置文件添加允许跨域参数
http.cors.enabled: true
http.cors.allow-origin: "*"
1.npm安装方式
2.docker安装
3.google浏览器插件(推荐)
从google商店安装es-head插件
将安装好的插件导出到本地
修改插件文件名为zip后缀
解压目录
拓展程序-开发者模式-打开已解压的目录
连接地址修改为ES的IP地址
Head插件在5.0以后安装方式发生了改变,需要nodejs环境支持,或者直接使用别人封装好的docker镜像
插件官方地址
https://github.com/mobz/elasticsearch-head
使用docker部署elasticsearch-head
docker pull alivv/elasticsearch-head
docker run --name es-head -p 9100:9100 -dit elivv/elasticsearch-head
使用nodejs编译安装elasticsearch-head
cd /opt/
wget https://nodejs.org/dist/v12.13.0/node-v12.13.0-linux-x64.tar.xz
tar xf node-v12.13.0-linux-x64.tar.xz
mv node-v12.13.0-linux-x64 node
echo 'export PATH=$PATH:/opt/node/bin' >> /etc/profile
source /etc/profile
npm -v
node -v
git clone git://github.com/mobz/elasticsearch-head.git
unzip elasticsearch-head-master.zip
cd elasticsearch-head-master
npm install -g cnpm --registry=https://registry.npm.taobao.org
cnpm install
cnpm run start &
rpm -ivh kibana-6.6.0-x86_64.rpm
[root@db-01 /data/soft]# grep "^[a-Z]" /etc/kibana/kibana.yml
server.port: 5601
server.host: "10.0.0.51"
elasticsearch.hosts: ["http://localhost:9200"]
kibana.index: ".kibana"
systemctl start kibana
curl -XGET 'http://10.0.0.51:9200/_count?pretty' -H 'Content-Type: application/json' -d '
{
"query": { "match_all": {}
}
}
'
https://www.elastic.co/guide/en/elasticsearch/reference/6.6/docs.html
PUT oldzhang/info/1
{
"name": "zhang",
"age": "29"
}
POST oldzhang/info/
{
"name": "zhang",
"age": "29",
"pet": "xiaoqi"
}
mysql
id name age pet job
1 zhang 27 xiaoqi it
2 ya 22 xiaoqi it
POST linux5/_doc/
{
"id": "1",
"name": "zhang",
"age": "29",
"pet": "xiaoqi",
"job": "it"
}
POST linux5/_doc/
{
"id": "2",
"name": "ya",
"age": "22",
"pet": "xiaoqi",
"job": "it"
}
POST linux5/_doc/
{
"name": "zhang",
"age": "29",
"pet": "xiaoqi",
"job": "it"
}
POST linux5/_doc/
{
"name": "json",
"age": "30",
"pet": "mimi",
"job": "it"
}
POST linux5/_doc/
{
"name": "jl",
"age": "26",
"pet": "10yue",
"job": "it"
}
POST linux5/_doc/
{
"name": "zzy",
"age": "35",
"pet": "csgo",
"job": "it"
}
POST linux5/_doc/
{
"name": "ya",
"age": "28",
"pet": "xiaomin",
"job": "it"
}
POST linux5/_doc/
{
"name": "scj",
"age": "26",
"pet": "scj",
"job": "SM"
}
POST linux5/_doc/
{
"name": "xiaozhang",
"age": "38",
"pet": "mysun",
"job": "3P"
}
GET oldzhang/_search/
GET oldzhang/_search
{
"query": {
"term": {
"name": {
"value": "xiaomin"
}
}
}
}
GET oldzhang/_search
{
"query": {
"term": {
"job": {
"value": "it"
}
}
}
}
GET /oldzhang/_search
{
"query" : {
"bool": {
"must": [
{"match": {"pet": "xiaoqi"}},
{"match": {"name": "zhang"}}
],
"filter": {
"range": {
"age": {
"gte": 27,
"lte": 30
}
}
}
}
}
}
}
PUT oldzhang/info/1
{
"name": "zhang",
"age": 30,
"job": "it",
"id": 1
}
创建测试数据
PUT oldzhang/info/1
{
"name": "zhang",
"age": 30,
"job": "it",
"id": 2
}
先根据自定义的Id字段查出数据的随机ID
GET oldzhang/_search/
{
"query": {
"term": {
"id": {
"value": "2"
}
}
}
}
取到随机ID后更改数据
PUT oldzhang/info/CVDdknIBq3aq7mPQaoWw
{
"name": "yayay",
"age": 30,
"job": "it",
"id": 2
}
1分片
1副本
绿色: 所有数据都完整,且副本数满足
黄色: 所有数据都完整,但是副本数不满足
红色: 一个或多个索引数据不完整
主节点: 负责调度数据分配到哪个节点
数据节点: 实际负责处理数据的节点
默认: 主节点也是工作节点
主分片: 实际存储的数据,负责读写,粗框的是主分片
副本分片: 主分片的副本,提供读,同步主分片,细框的是副本分片
主分片的备份,副本数量可以自定义
rpm -ivh jdk-8u102-linux-x64.rpm
rpm -ivh elasticsearch-6.6.0.rpm
systemctl edit elasticsearch.service
[Service]
LimitMEMLOCK=infinity
cat > /etc/elasticsearch/elasticsearch.yml <<EOF
cluster.name: linux5
node.name: node-1
path.data: /var/lib/elasticsearch
path.logs: /var/log/elasticsearch
bootstrap.memory_lock: true
network.host: 127.0.0.1,10.0.0.51
http.port: 9200
discovery.seed_hosts: ["10.0.0.51","10.0.0.52"]
cluster.initial_master_nodes: ["10.0.0.51","10.0.0.52"]
EOF
cat> /etc/elasticsearch/elasticsearch.yml <<EOF
cluster.name: linux5
node.name: node-2
path.data: /var/lib/elasticsearch
path.logs: /var/log/elasticsearch
bootstrap.memory_lock: true
network.host: 127.0.0.1,10.0.0.52
http.port: 9200
discovery.seed_hosts: ["10.0.0.51","10.0.0.52"]
cluster.initial_master_nodes: ["10.0.0.51","10.0.0.52"]
EOF
systemctl daemon-reload
systemctl restart elasticsearch
tail -f /var/log/elasticsearch/linux5.log
ES-head查看是否有2个节点
1.插入和读取数据在任意节点都可以执行,效果一样
2.es-head可以连接集群内任一台服务
3.主节点负责读写
如果主分片所在的节点坏掉了,副本分片会升为主分片
4.主节点负责调度
如果主节点坏掉了,数据节点会自动升为主节点
5.通讯端口
默认会有2个通讯端口:9200和9300
9300并没有在配置文件里配置过
如果开启了防火墙并且没有放开9300端口,那么集群通讯就会失败
GET _cat/nodes
GET _cat/health
GET _cat/master
GET _cat/fielddata
GET _cat/indices
GET _cat/shards
GET _cat/shards/oldzhang
rpm -ivh jdk-8u102-linux-x64.rpm
rpm -ivh elasticsearch-6.6.0.rpm
systemctl edit elasticsearch.service
[Service]
LimitMEMLOCK=infinity
cat > /etc/elasticsearch/elasticsearch.yml <<EOF
cluster.name: linux5
node.name: node-3
path.data: /var/lib/elasticsearch
path.logs: /var/log/elasticsearch
bootstrap.memory_lock: true
network.host: 10.0.0.53,127.0.0.1
http.port: 9200
discovery.zen.ping.unicast.hosts: ["10.0.0.51", "10.0.0.53"]
discovery.zen.minimum_master_nodes: 1
EOF
1.对于新添加的节点来说:
只需要直到集群内任意一个节点的IP和他自己本身的IP即可
对于以前的节点来说:
什么都不需要更改
2.最大master节点数设置
3个节点,设置为2
3.默认创建索引为1副本5分片
4.数据分配的时候会出现2中颜色
紫色: 正在迁移
黄色: 正在复制
绿色: 正常
5.3个节点的ES集群,极限情况下最多允许坏几台
0副本一台都不能坏
1副本的极限情况下可以坏2台: 1台1台的坏,不能同时坏2台,在数据复制完成的情况下,可以坏2台
2副本的情况可以同时坏2台
6.集群改单台节点
systemctl stop elasticsearch
rm -rf /var/lib/elasticsearch/*
cat > /etc/elasticsearch/elasticsearch.yml << 'EOF'
node.name: node-1
path.data: /var/lib/elasticsearch
path.logs: /var/log/elasticsearch
network.host: 127.0.0.1,10.0.0.51
http.port: 9200
discovery.seed_hosts: ["10.0.0.51"]
cluster.initial_master_nodes: ["10.0.0.51"]
EOF
systemctl start elasticsearch
GET _cluster/settings
PUT _cluster/settings
{
"transient": {
"discovery.zen.minimum_master_nodes": 2
}
}
索引一旦建立完成,分片数就不可以修改了
但是副本数可以随时修改
PUT /yayayaay/
{
"settings": {
"number_of_shards": 3,
"number_of_replicas": 0
}
}
PUT /oldzhang/_settings/
{
"settings": {
"number_of_replicas": 0
}
}
PUT /_all/_settings/
{
"settings": {
"number_of_replicas": 0
}
}
2个节点: 默认就可以
3个节点: 重要的数据,2副本 不重要的默认
日志收集: 1副本3分片
0.不能只监控集群状态
1.监控节点数
2.监控集群状态
3.两者任意一个发生改变了都报警
查看集群健康状态
curl -s 127.0.0.1:9200/_cat/health|grep "green"|wc -l
查看节点个数
curl -s 127.0.0.1:9200/_cat/nodes|wc -l
点击kibana面板的监控按钮
GET /_cluster/settings
PUT /_cluster/settings
{
"persistent" : {
"xpack" : {
"monitoring" : {
"collection" : {
"enabled" : "false"
}
}
}
}
}
官方参考:
https://www.elastic.co/guide/en/elasticsearch/reference/current/system-config.html
总结:
1.内存
1.系统建议预留一半
2.每个ES节点不要超过32G
3.关闭swap分区
4.配置文件打开内存锁定参数
48内存
系统留一半: 24G
自己留一半: 24G
8G 12G 16G 24G 30G
2.SSD硬盘
0 10 不做RAID
3.代码规范
4.升级大版本
POST /news/txt/1
{"content":"美国留给伊拉克的是个烂摊子吗"}
POST /news/txt/2
{"content":"公安部:各地校车将享最高路权"}
POST /news/txt/3
{"content":"中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"}
POST /news/txt/4
{"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}
POST /news/_search
{
"query" : { "match" : { "content" : "中国" }},
"highlight" : {
"pre_tags" : ["<tag1>", "<tag2>"],
"post_tags" : ["</tag1>", "</tag2>"],
"fields" : {
"content" : {}
}
}
}
所有的ES节点都需要安装
所有的ES都需要重启才能生效
中文分词器的版本号要和ES版本号对应
在线安装
cd /usr/share/elasticsearch
./bin/elasticsearch-plugin install https://github.com/medcl/elasticsearch-analysis-ik/releases/download/v6.6.0/elasticsearch-analysis-ik-6.6.0.zip
离线本地文件安装
/usr/share/elasticsearch/bin/elasticsearch-plugin install file:///opt/elasticsearch-analysis-ik-6.6.0.zip
PUT /news2
POST /news2/text/_mapping
{
"properties": {
"content": {
"type": "text",
"analyzer": "ik_max_word",
"search_analyzer": "ik_smart"
}
}
}
POST /news2/text/1
{"content":"美国留给伊拉克的是个烂摊子吗"}
POST /news2/text/2
{"content":"公安部:各地校车将享最高路权"}
POST /news2/text/3
{"content":"中韩渔警冲突调查:韩警平均每天扣1艘中国渔船"}
POST /news2/text/4
{"content":"中国驻洛杉矶领事馆遭亚裔男子枪击 嫌犯已自首"}
POST /news2/_search
{
"query" : { "match" : { "content" : "中国" }},
"highlight" : {
"pre_tags" : ["<tag1>", "<tag2>"],
"post_tags" : ["</tag1>", "</tag2>"],
"fields" : {
"content" : {}
}
}
}
yum install nginx -y
cat >>/usr/share/nginx/html/my_dic.txt<<EOF
北京
张亚
武汉
中国
EOF
nginx -t
systemctl restart nginx
curl 127.0.0.1/my_dic.txt
cat >/etc/elasticsearch/analysis-ik/IKAnalyzer.cfg.xml<<'EOF'
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE properties SYSTEM "http://java.sun.com/dtd/properties.dtd">
<properties>
<comment>IK Analyzer 扩展配置</comment>
<!--用户可以在这里配置自己的扩展字典 -->
<entry key="ext_dict"></entry>
<!--用户可以在这里配置自己的扩展停止词字典-->
<entry key="ext_stopwords"></entry>
<!--用户可以在这里配置远程扩展字典 -->
<entry key="remote_ext_dict">http://10.0.0.51/my_dic.txt</entry>
<!--用户可以在这里配置远程扩展停止词字典-->
<!-- <entry key="remote_ext_stopwords">words_location</entry> -->
</properties>
EOF
cd /etc/elasticsearch/analysis-ik/
scp IKAnalyzer.cfg.xml 10.0.0.52:/etc/elasticsearch/analysis-ik/
systemctl restart elasticsearch.service
[2020-02-12T14:56:38,610][INFO ][o.w.a.d.Monitor ] [node-1] 重新加载词典...
[2020-02-12T14:56:38,611][INFO ][o.w.a.d.Monitor ] [node-1] try load config from /etc/elasticsearch/analysis-ik/IKAnalyzer.cfg.xml
[2020-02-12T14:56:38,614][INFO ][o.w.a.d.Monitor ] [node-1] [Dict Loading] http://10.0.0.51/my_dic.txt
[2020-02-12T14:56:38,628][INFO ][o.w.a.d.Monitor ] [node-1] 上海
[2020-02-12T14:56:38,629][INFO ][o.w.a.d.Monitor ] [node-1] 班长
[2020-02-12T14:56:38,629][INFO ][o.w.a.d.Monitor ] [node-1] 学委
[2020-02-12T14:56:38,629][INFO ][o.w.a.d.Monitor ] [node-1] 张亚
[2020-02-12T14:56:38,629][INFO ][o.w.a.d.Monitor ] [node-1] 胖虎
[2020-02-12T14:56:38,629][INFO ][o.w.a.d.Monitor ] [node-1] 重新加载词典完毕...
echo "武汉加油" >> /usr/share/nginx/html/my_dic.txt
POST /news2/text/7
{"content":"武汉加油!"}
POST /news2/_search
{
"query" : { "match" : { "content" : "武汉" }},
"highlight" : {
"pre_tags" : ["<tag1>", "<tag2>"],
"post_tags" : ["</tag1>", "</tag2>"],
"fields" : {
"content" : {}
}
}
}
先把新上架的商品的关键词更新到词典里
查看ES日志,确认新词被动态更新了
自己编写一个测试索引,插入测试数据,然后查看搜索结果
确认没有问题之后,在让开发插入新商品的数据
测试
https://www.elastic.co/guide/en/elasticsearch/reference/6.6/modules-snapshots.html
mkdir /data/backup -p
chown -R elasticsearch:elasticsearch /data/backup/
path.repo: ["/data/backup"]
systemctl restart elasticsearch
PUT /_snapshot/my_fs_backup
{
"type": "fs",
"settings": {
"location": "/data/backup/my_fs_backup_location",
"compress": true
}
}
GET /_snapshot/my_fs_backup
PUT /_snapshot/my_fs_backup/snapshot_1?wait_for_completion=true
PUT /_snapshot/my_fs_backup/snapshot_2?wait_for_completion=true
{
"indices": "news,news2",
"ignore_unavailable": true,
"include_global_state": false
}
GET /_snapshot/my_fs_backup/snapshot_1
GET /_snapshot/my_fs_backup/snapshot_2
GET /_snapshot/my_fs_backup/_current
DELETE /_snapshot/my_fs_backup/snapshot_2
DELETE /_snapshot/my_fs_backup
POST /_snapshot/my_fs_backup/snapshot_1/_restore
POST /_snapshot/my_fs_backup/snapshot_1/_restore
{
"indices": "news,news2",
"ignore_unavailable": true,
"include_global_state": true,
"rename_pattern": "news_(.+)",
"rename_replacement": "restored_news_$1"
}
POST /_snapshot/my_fs_backup/snapshot_1/_restore
{
"indices": "index_1",
"index_settings": {
"index.number_of_replicas": 0
},
"ignore_index_settings": [
"index.refresh_interval"
]
}
PUT /_snapshot/my_fs_backup/%3Csnapshot-%7Bnow%2Fd%7D%3E
GET /_snapshot/my_fs_backup/_all
需要node环境
npm -v
node -v
https://nodejs.org/dist/v10.16.3/node-v10.16.3-linux-x64.tar.xz
tar xf node-v10.16.3-linux-x64.tar.xz -C /opt/node
ln -s node-v10.16.3-linux-x64 node
echo 'export PATH=/opt/node/bin:$PATH' >> /etc/profile
source /etc/profile
npm -v
node -v
npm install -g cnpm --registry=https://registry.npm.taobao.org
cnpm install elasticdump -g
备份成可读的json格式
elasticdump \
--input=http://10.0.0.51:9200/news2 \
--output=/data/news2.json \
--type=data
备份成压缩格式
elasticdump \
--input=http://10.0.0.51:9200/news2 \
--output=$|gzip > /data/news2.json.gz
备份分词器/mapping/数据一条龙服务
elasticdump \
--input=http://10.0.0.51:9200/news2 \
--output=/data/news2_analyzer.json \
--type=analyzer
elasticdump \
--input=http://10.0.0.51:9200/news2 \
--output=/data/news2_mapping.json \
--type=mapping
elasticdump \
--input=http://10.0.0.51:9200/news2 \
--output=/data/news2.json \
--type=data
只恢复数据
elasticdump \
--input=/data/news2.json \
--output=http://10.0.0.51:9200/news2
恢复所有数据包含分词器/mapping一条龙
elasticdump \
--input=/data/news2_analyzer.json \
--output=http://10.0.0.51:9200/news2 \
--type=analyzer
elasticdump \
--input=/data/news2_mapping.json \
--output=http://10.0.0.51:9200/news2 \
--type=mapping
elasticdump \
--input=/data/news2.json \
--output=http://10.0.0.51:9200/news2
--type=data
curl -s 10.0.0.52:9200/_cat/indices|awk '{print $3}'|grep -v "^\."
1.如果恢复的时候数据冲突了,会被覆盖掉
2.如果已经存在备份文件里没有的数据,会保留下来
--input=http://name:password@production.es.com:9200/my_index
上一篇:c语言笔记
还没有评论,快来说点什么吧~