diff options
| author | wangchengcheng <[email protected]> | 2023-07-27 15:43:51 +0800 |
|---|---|---|
| committer | wangchengcheng <[email protected]> | 2023-07-27 15:43:51 +0800 |
| commit | 124f687daace8b85e5c74abac04bcd0a92744a8d (patch) | |
| tree | 4f563326b1be67cfb51bf6a04f1ca4d953536e76 /MSH-PIC/clickhouse | |
| parent | 08686ae87f9efe7a590f48db74ed133b481c85b1 (diff) | |
P19 23.07 online-configP19
Diffstat (limited to 'MSH-PIC/clickhouse')
| -rw-r--r-- | MSH-PIC/clickhouse/clickhouse-server/clickhouse-server | 355 | ||||
| -rw-r--r-- | MSH-PIC/clickhouse/clickhouse-server/clickhouse-server.pid | 1 | ||||
| -rw-r--r-- | MSH-PIC/clickhouse/clickhouse-server/config.xml | 403 | ||||
| -rw-r--r-- | MSH-PIC/clickhouse/clickhouse-server/metrika.xml | 55 | ||||
| -rw-r--r-- | MSH-PIC/clickhouse/clickhouse-server/users.xml | 214 |
5 files changed, 1028 insertions, 0 deletions
diff --git a/MSH-PIC/clickhouse/clickhouse-server/clickhouse-server b/MSH-PIC/clickhouse/clickhouse-server/clickhouse-server new file mode 100644 index 0000000..7fe5acd --- /dev/null +++ b/MSH-PIC/clickhouse/clickhouse-server/clickhouse-server @@ -0,0 +1,355 @@ +#!/bin/sh +### BEGIN INIT INFO +# Provides: clickhouse-server +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Required-Start: +# Required-Stop: +# Short-Description: Yandex clickhouse-server daemon +### END INIT INFO + + +CLICKHOUSE_USER=clickhouse +CLICKHOUSE_GROUP=${CLICKHOUSE_USER} +SHELL=/bin/bash +PROGRAM=clickhouse-server +GENERIC_PROGRAM=clickhouse +EXTRACT_FROM_CONFIG=${GENERIC_PROGRAM}-extract-from-config +SYSCONFDIR=/data/tsg/olap/clickhouse/$PROGRAM +CLICKHOUSE_LOGDIR=/data/tsg/olap/clickhouse/clickhouse-server +CLICKHOUSE_LOGDIR_USER=root +CLICKHOUSE_DATADIR_OLD=/data/tsg/olap/clickhouse/clickhouse_old +LOCALSTATEDIR=/var/lock +BINDIR=/usr/bin +CLICKHOUSE_CRONFILE=/etc/cron.d/clickhouse-server +CLICKHOUSE_CONFIG=/data/tsg/olap/clickhouse/clickhouse-server/config.xml +LOCKFILE=$LOCALSTATEDIR/$PROGRAM +RETVAL=0 +CLICKHOUSE_PIDDIR=/data/tsg/olap/clickhouse/$PROGRAM +CLICKHOUSE_PIDFILE="$CLICKHOUSE_PIDDIR/$PROGRAM.pid" + +# Some systems lack "flock" +command -v flock >/dev/null && FLOCK=flock + + +# Override defaults from optional config file +test -f /etc/default/clickhouse && . /etc/default/clickhouse + +# On x86_64, check for required instruction set. +if uname -mpi | grep -q 'x86_64'; then + if ! grep -q 'sse4_2' /proc/cpuinfo; then + # On KVM, cpuinfo could falsely not report SSE 4.2 support, so skip the check. + if ! grep -q 'Common KVM processor' /proc/cpuinfo; then + + # Some other VMs also report wrong flags in cpuinfo. + # Tricky way to test for instruction set: + # create temporary binary and run it; + # if it get caught illegal instruction signal, + # then required instruction set is not supported really. + # + # Generated this way: + # gcc -xc -Os -static -nostdlib - <<< 'void _start() { __asm__("pcmpgtq %%xmm0, %%xmm1; mov $0x3c, %%rax; xor %%rdi, %%rdi; syscall":::"memory"); }' && strip -R .note.gnu.build-id -R .comment -R .eh_frame -s ./a.out && gzip -c -9 ./a.out | base64 -w0; echo + + if ! (echo -n 'H4sICAwAW1cCA2Eub3V0AKt39XFjYmRkgAEmBjsGEI+H0QHMd4CKGyCUAMUsGJiBJDNQNUiYlQEZOKDQclB9cnD9CmCSBYqJBRxQOvBpSQobGfqIAWn8FuYnPI4fsAGyPQz/87MeZtArziguKSpJTGLQK0mtKGGgGHADMSgoYH6AhTMPNHyE0NQzYuEzYzEXFr6CBPQDANAsXKTwAQAA' | base64 -d | gzip -d > /tmp/clickhouse_test_sse42 && chmod a+x /tmp/clickhouse_test_sse42 && /tmp/clickhouse_test_sse42); then + echo 'Warning! SSE 4.2 instruction set is not supported' + #exit 3 + fi + fi + fi +fi + + +SUPPORTED_COMMANDS="{start|stop|status|restart|forcestop|forcerestart|reload|condstart|condstop|condrestart|condreload|initdb}" +is_supported_command() +{ + echo "$SUPPORTED_COMMANDS" | grep -E "(\{|\|)$1(\||})" &> /dev/null +} + + +is_running() +{ + [ -r "$CLICKHOUSE_PIDFILE" ] && pgrep -s $(cat "$CLICKHOUSE_PIDFILE") 1> /dev/null 2> /dev/null +} + + +wait_for_done() +{ + while is_running; do + sleep 1 + done +} + + +die() +{ + echo $1 >&2 + exit 1 +} + + +# Check that configuration file is Ok. +check_config() +{ + if [ -x "$BINDIR/$EXTRACT_FROM_CONFIG" ]; then + su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path" >/dev/null || die "Configuration file ${CLICKHOUSE_CONFIG} doesn't parse successfully. Won't restart server. You may use forcerestart if you are sure."; + fi +} + + +initdb() +{ + if [ -d ${SYSCONFDIR} ]; then + su -s /bin/sh ${CLICKHOUSE_USER} -c "test -w ${SYSCONFDIR}" || chown ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${SYSCONFDIR} + fi + + if [ -x "$BINDIR/$EXTRACT_FROM_CONFIG" ]; then + CLICKHOUSE_DATADIR_FROM_CONFIG=$(su -s $SHELL ${CLICKHOUSE_USER} -c "$BINDIR/$EXTRACT_FROM_CONFIG --config-file=\"$CLICKHOUSE_CONFIG\" --key=path") + if [ "(" "$?" -ne "0" ")" -o "(" -z "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ")" ]; then + die "Cannot obtain value of path from config file: ${CLICKHOUSE_CONFIG}"; + fi + echo "Path to data directory in ${CLICKHOUSE_CONFIG}: ${CLICKHOUSE_DATADIR_FROM_CONFIG}" + else + CLICKHOUSE_DATADIR_FROM_CONFIG="/var/lib/clickhouse" + fi + + if ! getent group ${CLICKHOUSE_USER} >/dev/null; then + echo "Can't chown to non-existing user ${CLICKHOUSE_USER}" + return + fi + if ! getent passwd ${CLICKHOUSE_GROUP} >/dev/null; then + echo "Can't chown to non-existing group ${CLICKHOUSE_GROUP}" + return + fi + + if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -r ${CLICKHOUSE_CONFIG}"); then + echo "Warning! clickhouse config [${CLICKHOUSE_CONFIG}] not readable by user [${CLICKHOUSE_USER}]" + fi + + if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -O \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\" && test -G \"${CLICKHOUSE_DATADIR_FROM_CONFIG}\""); then + if [ $(dirname "${CLICKHOUSE_DATADIR_FROM_CONFIG}") == "/" ]; then + echo "Directory ${CLICKHOUSE_DATADIR_FROM_CONFIG} seems too dangerous to chown." + else + if [ ! -e "${CLICKHOUSE_DATADIR_FROM_CONFIG}" ]; then + echo "Creating directory ${CLICKHOUSE_DATADIR_FROM_CONFIG}" + mkdir -p "${CLICKHOUSE_DATADIR_FROM_CONFIG}" + fi + + echo "Changing owner of [${CLICKHOUSE_DATADIR_FROM_CONFIG}] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]" + chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} "${CLICKHOUSE_DATADIR_FROM_CONFIG}" + fi + fi + + if ! $(su -s $SHELL ${CLICKHOUSE_USER} -c "test -w ${CLICKHOUSE_LOGDIR}"); then + echo "Changing owner of [${CLICKHOUSE_LOGDIR}/*] to [${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP}]" + chown -R ${CLICKHOUSE_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR}/* + echo "Changing owner of [${CLICKHOUSE_LOGDIR}] to [${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP}]" + chown ${CLICKHOUSE_LOGDIR_USER}:${CLICKHOUSE_GROUP} ${CLICKHOUSE_LOGDIR} + fi +} + + +start() +{ + [ -x $BINDIR/$PROGRAM ] || exit 0 + local EXIT_STATUS + EXIT_STATUS=0 + + echo -n "Start $PROGRAM service: " + + if is_running; then + echo -n "already running " + EXIT_STATUS=1 + else + ulimit -n 262144 + mkdir -p $CLICKHOUSE_PIDDIR + chown -R $CLICKHOUSE_USER:$CLICKHOUSE_GROUP $CLICKHOUSE_PIDDIR + initdb + if ! is_running; then + # Lock should not be held while running child process, so we release the lock. Note: obviously, there is race condition. + # But clickhouse-server has protection from simultaneous runs with same data directory. + su -s $SHELL ${CLICKHOUSE_USER} -c "$FLOCK -u 9; exec -a \"$PROGRAM\" \"$BINDIR/$PROGRAM\" --daemon --pid-file=\"$CLICKHOUSE_PIDFILE\" --config-file=\"$CLICKHOUSE_CONFIG\"" + EXIT_STATUS=$? + if [ $EXIT_STATUS -ne 0 ]; then + break + fi + fi + fi + + if [ $EXIT_STATUS -eq 0 ]; then + echo "DONE" + else + echo "FAILED" + fi + + return $EXIT_STATUS +} + + +stop() +{ + local EXIT_STATUS + EXIT_STATUS=0 + + if [ -f $CLICKHOUSE_PIDFILE ]; then + + echo -n "Stop $PROGRAM service: " + + kill -TERM $(cat "$CLICKHOUSE_PIDFILE") + + wait_for_done + + echo "DONE" + fi + return $EXIT_STATUS +} + + +restart() +{ + check_config + stop + start +} + + +forcestop() +{ + local EXIT_STATUS + EXIT_STATUS=0 + + echo -n "Stop forcefully $PROGRAM service: " + + kill -KILL $(cat "$CLICKHOUSE_PIDFILE") + + wait_for_done + + echo "DONE" + return $EXIT_STATUS +} + + +forcerestart() +{ + forcestop + start +} + +use_cron() +{ + # 1. running systemd + if [ -x "/bin/systemctl" ] && [ -f /etc/systemd/system/clickhouse-server.service ] && [ -d /run/systemd/system ]; then + return 1 + fi + # 2. disabled by config + if [ -z "$CLICKHOUSE_CRONFILE" ]; then + return 2 + fi + return 0 +} + +enable_cron() +{ + use_cron && sed -i 's/^#*//' "$CLICKHOUSE_CRONFILE" +} + + +disable_cron() +{ + use_cron && sed -i 's/^#*/#/' "$CLICKHOUSE_CRONFILE" +} + + +is_cron_disabled() +{ + use_cron || return 0 + + # Assumes that either no lines are commented or all lines are commented. + # Also please note, that currently cron file for ClickHouse has only one line (but some time ago there was more). + grep -q -E '^#' "$CLICKHOUSE_CRONFILE"; +} + + +main() +{ + # See how we were called. + EXIT_STATUS=0 + case "$1" in + start) + start && enable_cron + ;; + stop) + disable_cron && stop + ;; + restart) + restart && enable_cron + ;; + forcestop) + disable_cron && forcestop + ;; + forcerestart) + forcerestart && enable_cron + ;; + reload) + restart + ;; + condstart) + is_running || start + ;; + condstop) + is_running && stop + ;; + condrestart) + is_running && restart + ;; + condreload) + is_running && restart + ;; + initdb) + initdb + ;; + enable_cron) + enable_cron + ;; + disable_cron) + disable_cron + ;; + *) + echo "Usage: $0 $SUPPORTED_COMMANDS" + exit 2 + ;; + esac + + exit $EXIT_STATUS +} + + +status() +{ + if is_running; then + echo "$PROGRAM service is running" + else + if is_cron_disabled; then + echo "$PROGRAM service is stopped"; + else + echo "$PROGRAM: process unexpectedly terminated" + fi + fi +} + + +# Running commands without need of locking +case "$1" in +status) + status + exit 0 + ;; +esac + + +( + if $FLOCK -n 9; then + main "$@" + else + echo "Init script is already running" && exit 1 + fi +) 9> $LOCKFILE diff --git a/MSH-PIC/clickhouse/clickhouse-server/clickhouse-server.pid b/MSH-PIC/clickhouse/clickhouse-server/clickhouse-server.pid new file mode 100644 index 0000000..8d9f099 --- /dev/null +++ b/MSH-PIC/clickhouse/clickhouse-server/clickhouse-server.pid @@ -0,0 +1 @@ +56515
\ No newline at end of file diff --git a/MSH-PIC/clickhouse/clickhouse-server/config.xml b/MSH-PIC/clickhouse/clickhouse-server/config.xml new file mode 100644 index 0000000..de391a7 --- /dev/null +++ b/MSH-PIC/clickhouse/clickhouse-server/config.xml @@ -0,0 +1,403 @@ +<?xml version="1.0"?> +<yandex> + <logger> + <!-- Possible levels: https://github.com/pocoproject/poco/blob/develop/Foundation/include/Poco/Logger.h#L105 --> + <level>error</level> + <log>/data/tsg/olap/clickhouse/logs/clickhouse-server.log</log> + <errorlog>/data/tsg/olap/clickhouse/logs/clickhouse-server.err.log</errorlog> + <size>200M</size> + <count>10</count> + <!-- <console>1</console> --> <!-- Default behavior is autodetection (log to console if not daemon mode and is tty) --> + </logger> + <!--display_name>production</display_name--> <!-- It is the name that will be shown in the client --> + <http_port>8123</http_port> + <tcp_port>9001</tcp_port> + <max_server_memory_usage>150000000000</max_server_memory_usage> + + <!-- For HTTPS and SSL over native protocol. --> + <!-- + <https_port>8443</https_port> + <tcp_port_secure>9440</tcp_port_secure> + --> + + <!-- Used with https_port and tcp_port_secure. Full ssl options list: https://github.com/ClickHouse-Extras/poco/blob/master/NetSSL_OpenSSL/include/Poco/Net/SSLManager.h#L71 --> + <openSSL> + <server> <!-- Used for https server AND secure tcp port --> + <!-- openssl req -subj "/CN=localhost" -new -newkey rsa:2048 -days 365 -nodes -x509 -keyout /etc/clickhouse-server/server.key -out /etc/clickhouse-server/server.crt --> + <certificateFile>/data/tsg/olap/clickhouse/clickhouse-server/server.crt</certificateFile> + <privateKeyFile>/data/tsg/olap/clickhouse/clickhouse-server/server.key</privateKeyFile> + <!-- openssl dhparam -out /etc/clickhouse-server/dhparam.pem 4096 --> + <dhParamsFile>/data/tsg/olap/clickhouse/clickhouse-server/dhparam.pem</dhParamsFile> + <verificationMode>none</verificationMode> + <loadDefaultCAFile>true</loadDefaultCAFile> + <cacheSessions>true</cacheSessions> + <disableProtocols>sslv2,sslv3</disableProtocols> + <preferServerCiphers>true</preferServerCiphers> + </server> + + <client> <!-- Used for connecting to https dictionary source --> + <loadDefaultCAFile>true</loadDefaultCAFile> + <cacheSessions>true</cacheSessions> + <disableProtocols>sslv2,sslv3</disableProtocols> + <preferServerCiphers>true</preferServerCiphers> + <!-- Use for self-signed: <verificationMode>none</verificationMode> --> + <invalidCertificateHandler> + <!-- Use for self-signed: <name>AcceptCertificateHandler</name> --> + <name>RejectCertificateHandler</name> + </invalidCertificateHandler> + </client> + </openSSL> + + <!-- Default root page on http[s] server. For example load UI from https://tabix.io/ when opening http://localhost:8123 --> + <!-- + <http_server_default_response><![CDATA[<html ng-app="SMI2"><head><base href="http://ui.tabix.io/"></head><body><div ui-view="" class="content-ui"></div><script src="http://loader.tabix.io/master.js"></script></body></html>]]></http_server_default_response> + --> + + <!-- Port for communication between replicas. Used for data exchange. --> + <interserver_http_port>9009</interserver_http_port> + + <!-- Hostname that is used by other replicas to request this server. + If not specified, than it is determined analoguous to 'hostname -f' command. + This setting could be used to switch replication to another network interface. + --> + + <interserver_http_host>192.168.20.193</interserver_http_host> + + + <!-- Listen specified host. use :: (wildcard IPv6 address), if you want to accept connections both with IPv4 and IPv6 from everywhere. --> + <listen_host>0.0.0.0</listen_host> + <!-- Same for hosts with disabled ipv6: --> + <!--<listen_host>0.0.0.0</listen_host>--> + + <!-- Default values - try listen localhost on ipv4 and ipv6: --> + +<!--<listen_host>::1</listen_host>--> + <!-- <listen_host>127.0.0.1</listen_host>--> + + <!-- Don't exit if ipv6 or ipv4 unavailable, but listen_host with this protocol specified --> + <!-- <listen_try>0</listen_try>--> + + <!-- Allow listen on same address:port --> + <!-- <listen_reuse_port>0</listen_reuse_port>--> + + <listen_backlog>64</listen_backlog> + + <max_connections>4096</max_connections> + <keep_alive_timeout>600</keep_alive_timeout> + + <!-- Maximum number of concurrent queries. --> + <!-- 21.12version 150 change to 500. --> + <max_concurrent_queries>500</max_concurrent_queries> + + <!-- Set limit on number of open files (default: maximum). This setting makes sense on Mac OS X because getrlimit() fails to retrieve + correct maximum value. --> + <!-- <max_open_files>262144</max_open_files> --> + + <!-- Size of cache of uncompressed blocks of data, used in tables of MergeTree family. + In bytes. Cache is single for server. Memory is allocated only on demand. + Cache is used when 'use_uncompressed_cache' user setting turned on (off by default). + Uncompressed cache is advantageous only for very short queries and in rare cases. + --> + <uncompressed_cache_size>8589934592</uncompressed_cache_size> + + <!-- Approximate size of mark cache, used in tables of MergeTree family. + In bytes. Cache is single for server. Memory is allocated only on demand. + You should not lower this value. + --> + <mark_cache_size>5368709120</mark_cache_size> + + + <!-- Path to data directory, with trailing slash. --> +<!-- <path>/data/tsg/olap/clickhouse/</path> --> + <path>/data/tsg/olap/clickhouse/</path> + + <!-- Path to temporary data for processing hard queries. --> +<!-- <tmp_path>/data/tsg/olap/clickhouse/tmp/</tmp_path>--> + <tmp_path>/data/tsg/olap/clickhouse/tmp/</tmp_path> + + <!-- Directory with user provided files that are accessible by 'file' table function. --> + <user_files_path>/data/tsg/olap/clickhouse/user_files/</user_files_path> + + <!-- Path to configuration file with users, access rights, profiles of settings, quotas. --> + <users_config>users.xml</users_config> + + <!-- Default profile of settings. --> + <default_profile>default</default_profile> + + <!-- System profile of settings. This settings are used by internal processes (Buffer storage, Distibuted DDL worker and so on). --> + <!-- <system_profile>default</system_profile> --> + + <!-- Default database. --> + <default_database>default</default_database> + + <!-- Server time zone could be set here. + + Time zone is used when converting between String and DateTime types, + when printing DateTime in text formats and parsing DateTime from text, + it is used in date and time related functions, if specific time zone was not passed as an argument. + + Time zone is specified as identifier from IANA time zone database, like UTC or Africa/Abidjan. + If not specified, system time zone at server startup is used. + + Please note, that server could display time zone alias instead of specified name. + Example: W-SU is an alias for Europe/Moscow and Zulu is an alias for UTC. + --> + <!-- <timezone>Europe/Moscow</timezone> --> + <timezone>UTC</timezone> + <!-- You can specify umask here (see "man umask"). Server will apply it on startup. + Number is always parsed as octal. Default umask is 027 (other users cannot read logs, data files, etc; group can only read). + --> + <!-- <umask>022</umask> --> + + <!-- Configuration of clusters that could be used in Distributed tables. + https://clickhouse.yandex/docs/en/table_engines/distributed/ + --> + <remote_servers incl="clickhouse_remote_servers" > + <!-- Test only shard config for testing distributed storage + <test_shard_localhost> + <shard> + <replica> + <host>localhost</host> + <port>9000</port> + </replica> + </shard> + </test_shard_localhost> + <test_shard_localhost_secure> + <shard> + <replica> + <host>localhost</host> + <port>9440</port> + <secure>1</secure> + </replica> + </shard> + </test_shard_localhost_secure>--> + </remote_servers> + + + <!-- If element has 'incl' attribute, then for it's value will be used corresponding substitution from another file. + By default, path to file with substitutions is /etc/metrika.xml. It could be changed in config in 'include_from' element. + Values for substitutions are specified in /yandex:wq +/name_of_substitution elements in that file. + --> + + <!-- ZooKeeper is used to store metadata about replicas, when using Replicated tables. + Optional. If you don't use replicated tables, you could omit that. + + See https://clickhouse.yandex/docs/en/table_engines/replication/ + --> + <zookeeper incl="zookeeper-servers" optional="true" /> + + <!-- Substitutions for parameters of replicated tables. + Optional. If you don't use replicated tables, you could omit that. + + See https://clickhouse.yandex/docs/en/table_engines/replication/#creating-replicated-tables + --> + <macros incl="macros" optional="true" /> + + + <!-- Reloading interval for embedded dictionaries, in seconds. Default: 3600. --> + <builtin_dictionaries_reload_interval>3600</builtin_dictionaries_reload_interval> + + + <!-- Maximum session timeout, in seconds. Default: 3600. --> + <max_session_timeout>21600</max_session_timeout> + + <!-- Default session timeout, in seconds. Default: 60. --> + <default_session_timeout>6000</default_session_timeout> +<max_table_size_to_drop>0</max_table_size_to_drop> +<max_partition_size_to_drop>0</max_partition_size_to_drop> +<include_from>/data/tsg/olap/clickhouse/clickhouse-server/metrika.xml</include_from> + <!-- Sending data to Graphite for monitoring. Several sections can be defined. --> + <!-- + interval - send every X second + root_path - prefix for keys + hostname_in_path - append hostname to root_path (default = true) + metrics - send data from table system.metrics + events - send data from table system.events + asynchronous_metrics - send data from table system.asynchronous_metrics + --> + <!-- + <graphite> + <host>localhost</host> + <port>42000</port> + <timeout>0.1</timeout> + <interval>60</interval> + <root_path>one_min</root_path> + <hostname_in_path>true</hostname_in_path> + + <metrics>true</metrics> + <events>true</events> + <asynchronous_metrics>true</asynchronous_metrics> + </graphite> + <graphite> + <host>localhost</host> + <port>42000</port> + <timeout>0.1</timeout> + <interval>1</interval> + <root_path>one_sec</root_path> + + <metrics>true</metrics> + <events>true</events> + <asynchronous_metrics>false</asynchronous_metrics> + </graphite> + --> + + + <!-- Query log. Used only for queries with setting log_queries = 1. --> + <query_log> + <!-- What table to insert data. If table is not exist, it will be created. + When query log structure is changed after system update, + then old table will be renamed and new table will be created automatically. + --> + <database>system</database> + <table>query_log</table> + <!-- + PARTITION BY expr https://clickhouse.yandex/docs/en/table_engines/custom_partitioning_key/ + Example: + event_date + toMonday(event_date) + toYYYYMM(event_date) + toStartOfHour(event_time) + --> + <partition_by>toYYYYMM(event_date)</partition_by> + <!-- Interval of flushing data. --> + <flush_interval_milliseconds>7500</flush_interval_milliseconds> + </query_log> + + + <!-- Uncomment if use part_log + <part_log> + <database>system</database> + <table>part_log</table> + + <flush_interval_milliseconds>7500</flush_interval_milliseconds> + </part_log> + --> + + + <!-- Parameters for embedded dictionaries, used in Yandex.Metrica. + See https://clickhouse.yandex/docs/en/dicts/internal_dicts/ + --> + + <!-- Path to file with region hierarchy. --> + <!-- <path_to_regions_hierarchy_file>/opt/geo/regions_hierarchy.txt</path_to_regions_hierarchy_file> --> + + <!-- Path to directory with files containing names of regions --> + <!-- <path_to_regions_names_files>/opt/geo/</path_to_regions_names_files> --> + + + <!-- Configuration of external dictionaries. See: + https://clickhouse.yandex/docs/en/dicts/external_dicts/ + --> + <dictionaries_config>*_dictionary.xml</dictionaries_config> + + <!-- Uncomment if you want data to be compressed 30-100% better. + Don't do that if you just started using ClickHouse. + --> + <compression incl="clickhouse_compression"> + <!-- + <!- - Set of variants. Checked in order. Last matching case wins. If nothing matches, lz4 will be used. - -> + <case> + + <!- - Conditions. All must be satisfied. Some conditions may be omitted. - -> + <min_part_size>10000000000</min_part_size> <!- - Min part size in bytes. - -> + <min_part_size_ratio>0.01</min_part_size_ratio> <!- - Min size of part relative to whole table size. - -> + + <!- - What compression method to use. - -> + <method>zstd</method> + </case> + --> + </compression> + + <!-- Allow to execute distributed DDL queries (CREATE, DROP, ALTER, RENAME) on cluster. + Works only if ZooKeeper is enabled. Comment it if such functionality isn't required. --> + <distributed_ddl> + <!-- Path in ZooKeeper to queue with DDL queries --> + <path>/clickhouse/task_queue/ddl</path> + + <!-- Settings from this profile will be used to execute DDL queries --> + <!-- <profile>default</profile> --> + </distributed_ddl> + + <!-- Settings to fine tune MergeTree tables. See documentation in source code, in MergeTreeSettings.h --> + <merge_tree> + <max_bytes_to_merge_at_max_space_in_pool>60000000000</max_bytes_to_merge_at_max_space_in_pool> + <ttl_only_drop_parts>1</ttl_only_drop_parts> + <min_merge_bytes_to_use_direct_io>0</min_merge_bytes_to_use_direct_io> + <max_suspicious_broken_parts>100</max_suspicious_broken_parts> + </merge_tree> + + + <!-- Protection from accidental DROP. + If size of a MergeTree table is greater than max_table_size_to_drop (in bytes) than table could not be dropped with any DROP query. + If you want do delete one table and don't want to restart clickhouse-server, you could create special file <clickhouse-path>/flags/force_drop_table and make DROP once. + By default max_table_size_to_drop is 50GB; max_table_size_to_drop=0 allows to DROP any tables. + The same for max_partition_size_to_drop. + Uncomment to disable protection. + --> + <!-- <max_table_size_to_drop>0</max_table_size_to_drop> --> + <!-- <max_partition_size_to_drop>0</max_partition_size_to_drop> --> + + <!-- Example of parameters for GraphiteMergeTree table engine --> + <graphite_rollup_example> + <pattern> + <regexp>click_cost</regexp> + <function>any</function> + <retention> + <age>0</age> + <precision>3600</precision> + </retention> + <retention> + <age>86400</age> + <precision>60</precision> + </retention> + </pattern> + <default> + <function>max</function> + <retention> + <age>0</age> + <precision>60</precision> + </retention> + <retention> + <age>3600</age> + <precision>300</precision> + </retention> + <retention> + <age>86400</age> + <precision>3600</precision> + </retention> + </default> + </graphite_rollup_example> + + <!-- Directory in <clickhouse-path> containing schema files for various input formats. + The directory will be created if it doesn't exist. + --> + <format_schema_path>/data/tsg/olap/clickhouse/format_schemas/</format_schema_path> + + <!-- + <storage_configuration> + <disks> + <ssd> + <path>if you want wo use this policies, please config the ssd mount path</path> + </ssd> + </disks> + + <policies> + <ssd_to_hdd> + <volumes> + <hot> + <disk>ssd</disk> + </hot> + <default> + <disk>default</disk> + </default> + </volumes> + <move_factor>0.1</move_factor> + </ssd_to_hdd> + </policies> + </storage_configuration> + --> + + <!-- Uncomment to disable ClickHouse internal DNS caching. --> + <!-- <disable_internal_dns_cache>1</disable_internal_dns_cache> --> +</yandex> diff --git a/MSH-PIC/clickhouse/clickhouse-server/metrika.xml b/MSH-PIC/clickhouse/clickhouse-server/metrika.xml new file mode 100644 index 0000000..f5faeb6 --- /dev/null +++ b/MSH-PIC/clickhouse/clickhouse-server/metrika.xml @@ -0,0 +1,55 @@ +<yandex> +<!--ck集群节点--> +<clickhouse_remote_servers> + +<ck_cluster> + <shard> + <!-- Optional. Shard weight when writing data. Default: 1. --> + <weight>1</weight> + <!-- Optional. Whether to write data to just one of the replicas. Default: false (write data to all replicas). --> + <internal_replication>false</internal_replication> + <replica> + <host>192.168.20.193</host> + <port>9001</port> + <user>default</user> + <password>galaxy2019</password> + </replica> + </shard> +</ck_cluster> + +</clickhouse_remote_servers> +<zookeeper-servers> +<node index="1"> +<host>192.168.20.221</host> +<port>2181</port> +</node> + +<node index="2"> +<host>192.168.20.222</host> +<port>2181</port> +</node> + +<node index="3"> +<host>192.168.20.223</host> +<port>2181</port> +</node> + +<session_timeout_ms>120000</session_timeout_ms> +</zookeeper-servers> + +<networks> +<ip>::/0</ip> +</networks> + +<!--压缩相关配置--> +<clickhouse_compression> +<case> +<min_part_size>10000000000</min_part_size> +<min_part_size_ratio>0.01</min_part_size_ratio> +<method>lz4</method> <!--压缩算法lz4压缩比zstd快, 更占磁盘--> +</case> +</clickhouse_compression> +</yandex> + + + diff --git a/MSH-PIC/clickhouse/clickhouse-server/users.xml b/MSH-PIC/clickhouse/clickhouse-server/users.xml new file mode 100644 index 0000000..990135b --- /dev/null +++ b/MSH-PIC/clickhouse/clickhouse-server/users.xml @@ -0,0 +1,214 @@ +<?xml version="1.0"?> +<yandex> + <!-- Profiles of settings. --> + <profiles> + <!-- Default settings. --> + <default> + <!-- Maximum memory usage for processing single query, in bytes. --> + <max_memory_usage>150000000000</max_memory_usage> + <!-- <max_memory_usage_for_all_queries>200000000000</max_memory_usage_for_all_queries> --> + <default_database_engine>Ordinary</default_database_engine> + <optimize_on_insert>0</optimize_on_insert> + <async_socket_for_remote>0</async_socket_for_remote> + <distributed_ddl_task_timeout>0</distributed_ddl_task_timeout> + <max_bytes_before_external_group_by>75000000000</max_bytes_before_external_group_by> + <distributed_aggregation_memory_efficient>1</distributed_aggregation_memory_efficient> + <distributed_product_mode>local</distributed_product_mode> + <log_queries>1</log_queries> + <cancel_http_readonly_queries_on_client_close>1</cancel_http_readonly_queries_on_client_close> + <background_pool_size>16</background_pool_size> + <!-- <enable_http_compression>1</enable_http_compression>--> + <replication_alter_columns_timeout>60</replication_alter_columns_timeout> + <skip_unavailable_shards>1</skip_unavailable_shards> + <max_execution_time>21600</max_execution_time> + <!-- Use cache of uncompressed blocks of data. Meaningful only for processing many of very short queries. --> + <use_uncompressed_cache>1</use_uncompressed_cache> + <replace_running_query>1</replace_running_query> + <http_receive_timeout>21600</http_receive_timeout> + <http_send_timeout>21600</http_send_timeout> + <receive_timeout>21600</receive_timeout> + <send_timeout>21600</send_timeout> + <count_distinct_implementation>uniqCombined</count_distinct_implementation> + <!-- How to choose between replicas during distributed query processing. + random - choose random replica from set of replicas with minimum number of errors + nearest_hostname - from set of replicas with minimum number of errors, choose replica + with minumum number of different symbols between replica's hostname and local hostname + (Hamming distance). + in_order - first live replica is choosen in specified order. + --> + <max_rows_to_group_by>10000000</max_rows_to_group_by> + <group_by_overflow_mode>any</group_by_overflow_mode> + <timeout_before_checking_execution_speed>3600</timeout_before_checking_execution_speed> + <load_balancing>in_order</load_balancing> + </default> + + <!-- Profile that allows only read queries. --> + <readonly> + <max_memory_usage>150000000000</max_memory_usage> + <!-- <max_memory_usage_for_all_queries>200000000000</max_memory_usage_for_all_queries> --> + <default_database_engine>Ordinary</default_database_engine> + <optimize_on_insert>0</optimize_on_insert> + <async_socket_for_remote>0</async_socket_for_remote> + <distributed_ddl_task_timeout>0</distributed_ddl_task_timeout> + <distributed_product_mode>local</distributed_product_mode> + <http_receive_timeout>600</http_receive_timeout> + <http_send_timeout>600</http_send_timeout> + <receive_timeout>600</receive_timeout> + <send_timeout>600</send_timeout> + <log_queries>1</log_queries> + <cancel_http_readonly_queries_on_client_close>1</cancel_http_readonly_queries_on_client_close> + <background_pool_size>16</background_pool_size> + <!-- http压缩 不影响http请求,只影响使用chproxy的客户端--> + <enable_http_compression>1</enable_http_compression> + <replace_running_query>1</replace_running_query> + <replication_alter_columns_timeout>60</replication_alter_columns_timeout> + <skip_unavailable_shards>1</skip_unavailable_shards> + <max_execution_time>600</max_execution_time> + <!-- Use cache of uncompressed blocks of data. Meaningful only for processing many of very short queries. --> + <timeout_before_checking_execution_speed>600</timeout_before_checking_execution_speed> + <use_uncompressed_cache>1</use_uncompressed_cache> + <count_distinct_implementation>uniqCombined</count_distinct_implementation> + <load_balancing>in_order</load_balancing> + <distributed_aggregation_memory_efficient>1</distributed_aggregation_memory_efficient> + <max_rows_to_group_by>10000000</max_rows_to_group_by> + <group_by_overflow_mode>any</group_by_overflow_mode> + <readonly>2</readonly> + + </readonly> + + <ckinsert> + <max_memory_usage>150000000000</max_memory_usage> + <!-- <max_memory_usage_for_all_queries>200000000000</max_memory_usage_for_all_queries> --> + <default_database_engine>Ordinary</default_database_engine> + <optimize_on_insert>0</optimize_on_insert> + <async_socket_for_remote>0</async_socket_for_remote> + <distributed_ddl_task_timeout>0</distributed_ddl_task_timeout> + <distributed_product_mode>local</distributed_product_mode> + <log_queries>1</log_queries> + <background_pool_size>16</background_pool_size> + + <replication_alter_columns_timeout>60</replication_alter_columns_timeout> + <skip_unavailable_shards>1</skip_unavailable_shards> + <max_execution_time>300</max_execution_time> + <!-- Use cache of uncompressed blocks of data. Meaningful only for processing many of very short queries. --> + <use_uncompressed_cache>0</use_uncompressed_cache> + <timeout_before_checking_execution_speed>300</timeout_before_checking_execution_speed> + <http_receive_timeout>300</http_receive_timeout> + <http_send_timeout>300</http_send_timeout> + <receive_timeout>300</receive_timeout> + <send_timeout>300</send_timeout> + <allow_ddl>0</allow_ddl> + <load_balancing>random</load_balancing> + </ckinsert> + </profiles> + + <!-- Users and ACL. --> + <users> + <!-- If user name was not specified, 'default' user is used. --> + <default> + <!-- Password could be specified in plaintext or in SHA256 (in hex format). + + If you want to specify password in plaintext (not recommended), place it in 'password' element. + Example: <password>qwerty</password>. + Password could be empty. + + If you want to specify SHA256, place it in 'password_sha256_hex' element. + Example: <password_sha256_hex>65e84be33532fb784c48129675f9eff3a682b27168c0ea744b2cf58ee02337c5</password_sha256_hex> + + How to generate decent password: + Execute: PASSWORD=$(base64 < /dev/urandom | head -c8); echo "$PASSWORD"; echo -n "$PASSWORD" | sha256sum | tr -d '-' + In first line will be password and in second - corresponding SHA256. + --> + <password_sha256_hex>d24247a535fe6794275904f9b72e7fcf14a8a45628874d2eb1fd147020a403f7</password_sha256_hex> + + <!-- List of networks with open access. + + To open access from everywhere, specify: + <ip>::/0</ip> + + To open access only from localhost, specify: + <ip>::1</ip> + <ip>127.0.0.1</ip> + + Each element of list has one of the following forms: + <ip> IP-address or network mask. Examples: 213.180.204.3 or 10.0.0.1/8 or 10.0.0.1/255.255.255.0 + 2a02:6b8::3 or 2a02:6b8::3/64 or 2a02:6b8::3/ffff:ffff:ffff:ffff::. + <host> Hostname. Example: server01.yandex.ru. + To check access, DNS query is performed, and all received addresses compared to peer address. + <host_regexp> Regular expression for host names. Example, ^server\d\d-\d\d-\d\.yandex\.ru$ + To check access, DNS PTR query is performed for peer address and then regexp is applied. + Then, for result of PTR query, another DNS query is performed and all received addresses compared to peer address. + Strongly recommended that regexp is ends with $ + All results of DNS requests are cached till server restart. + --> + <networks incl="networks" replace="replace"> + <ip>::/0</ip> + </networks> + + <!-- Settings profile for user. --> + <profile>default</profile> + + <!-- Quota for user. --> + <quota>default</quota> + </default> + + <tsg_report> + <password_sha256_hex>d24247a535fe6794275904f9b72e7fcf14a8a45628874d2eb1fd147020a403f7</password_sha256_hex> + <networks incl="networks" replace="replace"> + <ip>::/0</ip> + </networks> + <profile>default</profile> + <quota>default</quota> + </tsg_report> + + + <tsg_insert> + <password_sha256_hex>d24247a535fe6794275904f9b72e7fcf14a8a45628874d2eb1fd147020a403f7</password_sha256_hex> + <networks incl="networks" replace="replace"> + <ip>::/0</ip> + </networks> + <profile>ckinsert</profile> + <quota>default</quota> + </tsg_insert> + + <!-- Example of user with readonly access. --> + <tsg_query> + <password_sha256_hex>bce24719d7fef9c9569e710a344bf24d4a1d6a8f19c9ec1f4c4b7884a9d31121</password_sha256_hex> + <networks incl="networks" replace="replace"> + <ip>::/0</ip> + </networks> + <profile>readonly</profile> + <quota>default</quota> + </tsg_query> + + + <!-- Example of user with readonly access. --> + <readonly> + <password></password> + <networks incl="networks" replace="replace"> + <ip>::1</ip> + <ip>127.0.0.1</ip> + </networks> + <profile>readonly</profile> + <quota>default</quota> + </readonly> + </users> + + <!-- Quotas. --> + <quotas> + <!-- Name of quota. --> + <default> + <!-- Limits for time interval. You could specify many intervals with different limits. --> + <interval> + <!-- Length of interval. --> + <duration>3600</duration> + <!-- No limits. Just calculate resource usage for time interval. --> + <queries>0</queries> + <errors>0</errors> + <result_rows>0</result_rows> + <read_rows>0</read_rows> + <execution_time>0</execution_time> + </interval> + </default> + </quotas> +</yandex> |
