在超密集型“ ELK”的前夕,我们为您准备了一篇有用的文章的译文。
Twitter — ? , . Elastic Stack Twitter . Logstash . Kafka Connect, , , Logstash ( Apache Kafka) .
:
Elasticsearch Logstash
Kibana ( Xbox PlayStation)
HTML-
Elastic Search
Docker Compose. Elasticsearch, Logstash.
version: '3.3'
services:
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch:7.9.2
restart: unless-stopped
environment:
- discovery.type=single-node
- bootstrap.memory_lock=true
- "ES_JAVA_OPTS=-Xms512m -Xmx512m"
ulimits:
memlock:
soft: -1
hard: -1
volumes:
- esdata:/usr/share/elasticsearch/data
restart: unless-stopped
ports:
- 9200:9200
kibana:
image: docker.elastic.co/kibana/kibana:7.9.2
restart: unless-stopped
depends_on:
- elasticsearch
ports:
- 5601:5601
logstash:
image: docker.elastic.co/logstash/logstash:7.9.2
volumes:
- "./pipeline:/usr/share/logstash/pipeline"
environment:
LS_JAVA_OPTS: "-Xmx256m -Xms256m"
depends_on:
- elasticsearch
restart: unless-stopped
volumes:
esdata:
driver: local
Logtash
input {
twitter {
consumer_key => "loremipsum"
consumer_secret => "loremipsum"
oauth_token => "loremipsum-loremipsum"
oauth_token_secret => "loremipsum"
keywords => ["XboxSeriesX", "PS5"]
full_tweet => false
codec => "json"
}
}
output {
elasticsearch {
hosts => ["elasticsearch:9200"]
index => "tweets"
}
}
, Twitter. « ».
. keywords
. , full_tweet value
true
.
docker-compose up -d
tweets
. . 430 , . , . , .
, tweets
. Kibana, .
— Xbox PlayStation
hashtags.text.keyword
. PS5, , , .
— Xbox PlayStation
, PlayStation , Xbox. , . PS5
, — ps5
, .
, . ? . , Terms.
, . , . Filter KQL — Lucene, .
hashtags.text.keyword: (PS5 OR ps5 OR PlayStation5 OR PlayStation)
hashtags.text.keyword: (XboxSeriesX OR Xbox OR XboxSeriesS OR xbox)
. , PlayStation Twitter.
Timelion
XBOX PLAYSTATION
Timelion. . .
. , .
.es(index=tweets, q='hashtags.text.keyword: (PS5 OR ps5 OR PlayStation5 OR PlayStation)').label("PS"),
.es(index=tweets, q='hashtags.text.keyword: (XboxSeriesX OR Xbox OR XboxSeriesS OR xbox)').label("XBOX")
Timelion . PlayStation . , .
.es(index=tweets, q='hashtags.text.keyword: (PS5 OR ps5 OR PlayStation5 OR PlayStation)').label("PS"),
.es(index=tweets, q='hashtags.text.keyword: (PS5 OR ps5 OR PlayStation5 OR PlayStation)', offset=-1d).label("PS -1 day")
()
, .
.es(index=tweets, q='hashtags.text.keyword: (PS5 OR ps5 OR PlayStation5 OR PlayStation)')
.subtract(
.es(index=tweets, q='hashtags.text.keyword: (PS5 OR ps5 OR PlayStation5 OR PlayStation)', offset=-1h)
)
.label("PS 1h delta"),
.es(index=tweets, q='hashtags.text.keyword: (XboxSeriesX OR Xbox OR XboxSeriesS OR xbox)')
.subtract(
.es(index=tweets, q='hashtags.text.keyword: (XboxSeriesX OR Xbox OR XboxSeriesS OR xbox)', offset=-1h)
)
.label("XBOX 1h delta")
—
, . , , - . HTML-, .
Elasticsearch . , html_strip HTML-. , , text
, keyword
. .
, tweets
. html_strip , . . Kibana.
POST tweets/_close
PUT tweets/_settings
{
"analysis": {
"char_filter": {
"client_extractor": {
"type": "pattern_replace",
"pattern": "<a[^>]+>([^<]+)</a>",
"replacement": "$1"
}
},
"normalizer": {
"client_extractor_normalizer": {
"type": "custom",
"char_filter": [
"client_extractor"
]
}
}
}
}
POST tweets/_open
, .
PUT tweets/_mapping
{
"properties": {
"client": {
"type": "text",
"fields": {
"keyword": {
"type": "keyword",
"ignore_above": 256
},
"value":{
"type":"keyword",
"normalizer":"client_extractor_normalizer"
}
}
}
}
}
, . (, , , MongoDB? ). Update By Query.
POST tweets/_update_by_query?wait_for_completion=false&conflicts=proceed
task id. , . GET _cat/tasks?v
.
Kibana . , iPhone Android. Bot Xbox Series X
.
?
Spark NLP, , , Twitter. Spark NLP , Spark Structured Streaming.