Cassandra集群部署


主机信息

172.28.18.10 mongodb-test7

172.28.18.11 mongodb-test8

172.28.18.20 mongodb-test9

 

 操作系统参数配置

root@test-mysql~]#cat/etc/security/limits.d/90-nproc.conf

*softnproc32000

*hardnproc32000

rootsoftnprocunlimited

*softnofile32000

*hardnofile40960

 

ulimit的优化

 

cat/etc/security/limits.conf

*soft nofile 102400

*hard nofile 102400

*soft stack 1024

*hard stack 1024

 

 

内核的tcp优化

net.ipv4.tcp_syncookies=1

net.ipv4.tcp_tw_reuse=1

net.ipv4.tcp_tw_recycle=1

net.ipv4.tcp_synack_retries=2

net.ipv4.tcp_syn_retries=2

net.ipv4.tcp_wmem=8192436600873200

net.ipv4.tcp_rmem=32768436600873200

net.ipv4.tcp_mem=945000009150000092700000

net.ipv4.tcp_max_orphans=3276800

net.ipv4.tcp_fin_timeout=30

 

vm.swappiness=5

 

#直接生效

/sbin/sysctl-p

 

python必须安装2.7版本

$tarxvfPython-2.7.11.tgz

mkdir/usr/local/python27

#cdPython-2.7.11

[[email protected]]#./configure--prefix=/usr/local/python27

make

makeinstall

 

 

#[python]

exportPY_HOME=/usr/local/python27

exportPATH=$PY_HOME/bin:$PATH

 

export JAVA_HOME=/usr/local/jdk1.7.0_60

export PATH=$JAVA_HOME/bin:$PATH

exportCLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

ulimit -S -c unlimited > /dev/null 2>&1

 

#[cassandra]

export MGO_HOME=/opt/freeware/apache-cassandra-2.2.4

export PATH=$MGO_HOME/bin:$PATH

 

创建用户目录

#mkdir-p/data/users

添加用户组

groupadd-g355cassandra

添加用户mgousr01

useradd-u367-gcassandra-Gcassandra,users-d/data/users/cassandra01-mcassandra01

修改cassandra用户密码

passwdcassandra01--stdin<<

 

切换到mgousr01用户

创建如下目录

添加环境变量

#su-cassandra

mkdir -p cassandra/bin

mkdir -p cassandra/dbdata

mkdir -p cassandra/etc

mkdir -p cassandra/logs

mkdir -p cassandra/commitlog

mkdir -p cassandra/saved_caches

 

添加环境变量

$vim.bash_profile

#[cassandra]

exportMGO_HOME=/opt/freeware/apache-cassandra-2.2.4

exportPATH=$MGO_HOME/bin:$PATH

 

$source.bash_profile

 

serviceiptablesstop

chkconfigiptablesoff

 

#more/etc/hosts

172.28.18.10mongodb-test7

172.28.18.11mongodb-test8

172.28.18.20mongodb-test9

 

vim /etc/profile

exportJAVA_HOME=/opt/freeware/jdk1.7.0_71

exportPATH=$JAVA_HOME/bin:$PATH

exportCLASSPATH=.:$JAVA_HOME/lib/dt.jar:$JAVA_HOME/lib/tools.jar

 

mongodb-test7

编辑配置文件cassandra.yaml

# more cassandra.yaml

# Cassandra storage config YAML

 

# NOTE:

#   See http://wiki.apache.org/cassandra/StorageConfiguration for

#   fullexplanations of configuration directives

# /NOTE

 

# The name of the cluster. This is mainly used to preventmachines in

# one logical cluster from joining another.

cluster_name: 'Cluster01'

 

# This defines the number of tokens randomly assignedto this node on the ring

# The more tokens, relative to other nodes, the largerthe proportion of data

# that this node will store. You probably want allnodes to have the same number

# of tokens assuming they have equal hardwarecapability.

#

# If you leave this unspecified, Cassandra will usethe default of 1 token for legacy compatibility,

# and will use the initial_token as described below.

#

# Specifying initial_token will override this settingon the node's initial start,

# on subsequent starts, this setting will apply evenif initial token is set.

#

# If you already have a cluster with 1 token per node,and wish to migrate to

# multiple tokens per node, see http://wiki.apache.org/cassandra/Operations

num_tokens: 256

 

# initial_token allows you to specify tokensmanually.  While you can use # it with

# vnodes (num_tokens > 1, above) -- in which caseyou should provide a

# comma-separated list -- it's primarily used whenadding nodes # to legacy clusters

# that do not have vnodes enabled.

# initial_token:

 

# See http://wiki.apache.org/cassandra/HintedHandoff

# May either be "true" or "false"to enable globally, or contain a list

# of data centers to enable per-datacenter.

# hinted_handoff_enabled: DC1,DC2

hinted_handoff_enabled: true

# this defines the maximum amount of time a dead hostwill have hints

# generated. After it has been dead this long, new hints for it will not be

# created until it has been seen alive and gone downagain.

max_hint_window_in_ms: 10800000 # 3 hours

# Maximum throttle in KBs per second, per deliverythread.  This will be

# reduced proportionally to the number of nodes in thecluster.  (If there

# are two nodes in the cluster, each delivery threadwill use the maximum

# rate; if there are three, each will throttle to halfof the maximum,

# since we expect two nodes to be delivering hintssimultaneously.)

hinted_handoff_throttle_in_kb: 1024

# Number of threads with which to deliver hints;

# Consider increasing this number when you havemulti-dc deployments, since

# cross-dc handoff tends to be slower

max_hints_delivery_threads: 2

 

# Maximum throttle in KBs per second, total. This willbe

# reduced proportionally to the number of nodes in thecluster.

batchlog_replay_throttle_in_kb: 1024

 

# Authentication backend, implementing IAuthenticator;used to identify users

# Out of the box, Cassandra providesorg.apache.cassandra.auth.{AllowAllAuthenticator,

# PasswordAuthenticator}.

#

# - AllowAllAuthenticator performs no checks - set itto disable authentication.

# - PasswordAuthenticator relies on username/passwordpairs to authenticate

#   users. Itkeeps usernames and hashed passwords in system_auth.credentials table.

#   Pleaseincrease system_auth keyspace replication factor if you use this authenticator.

#   If usingPasswordAuthenticator, CassandraRoleManager must also be used (see below)

authenticator: AllowAllAuthenticator

 

# Authorization backend, implementing IAuthorizer;used to limit access/provide permissions

# Out of the box, Cassandra providesorg.apache.cassandra.auth.{AllowAllAuthorizer,

# CassandraAuthorizer}.

#

# - AllowAllAuthorizer allows any action to any user -set it to disable authorization.

# - CassandraAuthorizer stores permissions insystem_auth.permissions table. Please

#   increasesystem_auth keyspace replication factor if you use this authorizer.

authorizer: AllowAllAuthorizer

 

# Part of the Authentication & Authorizationbackend, implementing IRoleManager; used

# to maintain grants and memberships between roles.

# Out of the box, Cassandra providesorg.apache.cassandra.auth.CassandraRoleManager,

# which stores role information in the system_authkeyspace. Most functions of the

# IRoleManager require an authenticated login, sounless the configured IAuthenticator

# actually implements authentication, most of thisfunctionality will be unavailable.

#

# - CassandraRoleManager stores role data in thesystem_auth keyspace. Please

#   increasesystem_auth keyspace replication factor if you use this role manager.

role_manager: CassandraRoleManager

 

# Validity period for roles cache (fetchingpermissions can be an

# expensive operation depending on the authorizer).Granted roles are cached for

# authenticated sessions in AuthenticatedUser andafter the period specified

# here, become eligible for (async) reload.

# Defaults to 2000, set to 0 to disable.

# Will be disabled automatically forAllowAllAuthenticator.

roles_validity_in_ms: 2000

 

# Refresh interval for roles cache (if enabled).

# After this interval, cache entries become eligiblefor refresh. Upon next

# access, an async reload is scheduled and the oldvalue returned until it

# completes. If roles_validity_in_ms is non-zero, thenthis must be

# also.

# Defaults to the same value as roles_validity_in_ms.

# roles_update_interval_in_ms: 1000

 

# Validity period for permissions cache (fetchingpermissions can be an

# expensive operation depending on the authorizer,CassandraAuthorizer is

# one example). Defaults to 2000, set to 0 to disable.

# Will be disabled automatically forAllowAllAuthorizer.

permissions_validity_in_ms: 2000

 

# Refresh interval for permissions cache (if enabled).

# After this interval, cache entries become eligiblefor refresh. Upon next

# access, an async reload is scheduled and the oldvalue returned until it

# completes. If permissions_validity_in_ms isnon-zero, then this must be

# also.

# Defaults to the same value aspermissions_validity_in_ms.

# permissions_update_interval_in_ms: 1000

 

# The partitioner is responsible for distributinggroups of rows (by

# partition key) across nodes in the cluster.  You should leave this

# alone for new clusters.  The partitioner can NOT be changed without

# reloading all data, so when upgrading you should setthis to the

# same partitioner you were already using.

#

# Besides Murmur3Partitioner, partitioners includedfor backwards

# compatibility include RandomPartitioner,ByteOrderedPartitioner, and

# OrderPreservingPartitioner.

#

partitioner: org.apache.cassandra.dht.Murmur3Partitioner

 

# Directories where Cassandra should store data ondisk.  Cassandra

# will spread data evenly across them, subject to thegranularity of

# the configured compaction strategy.

# If not set, the default directory is$CASSANDRA_HOME/data/data.

data_file_directories:

#    - /var/lib/cassandra/data

     -/data/users/cassandra01/cassandra/dbdata

 

# commit log. when running on magnetic HDD, this should be a

# separate spindle than the data directories.

# If not set, the default directory is$CASSANDRA_HOME/data/commitlog.

# commitlog_directory: /var/lib/cassandra/commitlog

# ommitlog_directory:/data/users/cassandra01/cassandra/commitlog

10节点要注释(不注释启动会报错),在1120节点不要注释

# policy for data disk failures:

# die: shut down gossip and client transports and killthe JVM for any fs errors or

#     single-sstable errors, so the node can be replaced.

# stop_paranoid: shut down gossip and clienttransports even for single-sstable errors,

#               kill the JVM for errors during startup.

# stop: shut down gossip and client transports,leaving the node effectively dead, but

#       canstill be inspected via JMX, kill the JVM for errors during startup.

# best_effort: stop using the failed disk and respondto requests based on

#             remaining available sstables. This means you WILL see obsolete

#             data at CL.ONE!

# ignore: ignore fatal errors and let requests fail,as in pre-1.2 Cassandra

disk_failure_policy: stop

 

# policy for commit disk failures:

# die: shut down gossip and Thrift and kill the JVM,so the node can be replaced.

# stop: shut down gossip and Thrift, leaving the nodeeffectively dead, but

#       canstill be inspected via JMX.

# stop_commit: shutdown the commit log, letting writescollect but

#             continuing to service reads, as in pre-2.0.5 Cassandra

# ignore: ignore fatal errors and let the batches fail

commit_failure_policy: stop

 

# Maximum size of the key cache in memory.

#

# Each key cache hit saves 1 seek and each row cachehit saves 2 seeks at the

# minimum, sometimes more. The key cache is fairlytiny for the amount of

# time it saves, so it's worthwhile to use it at largenumbers.

# The row cache saves even more time, but must containthe entire row,

# so it is extremely space-intensive. It's best toonly use the

# row cache if you have hot rows or static rows.

#

# NOTE: if you reduce the size, you may not get youhottest keys loaded on startup.

#

# Default value is empty to make it "auto"(min(5% of Heap (in MB), 100MB)). Set to 0 to disable key ca

che.

key_cache_size_in_mb:

 

# Duration in seconds after which Cassandra should

# save the key cache. Caches are saved tosaved_caches_directory as

# specified in this configuration file.

#

# Saved caches greatly improve cold-start speeds, andis relatively cheap in

# terms of I/O for the key cache. Row cache saving ismuch more expensive and

# has limited use.

#

# Default is 14400 or 4 hours.

key_cache_save_period: 14400

 

# Number of keys from the key cache to save

# Disabled by default, meaning all keys are going tobe saved

# key_cache_keys_to_save: 100

 

# Row cache implementation class name.

# Available implementations:

#  org.apache.cassandra.cache.OHCProvider                Fully off-heap row cacheimplementation (defa

ult).

#  org.apache.cassandra.cache.SerializingCacheProvider   This is the row cache implementationavailabi

le

#                                                        in previous releases of Cassandra.

# row_cache_class_name:org.apache.cassandra.cache.OHCProvider

 

# Maximum size of the row cache in memory.

# Please note that OHC cache implementation requiressome additional off-heap memory to manage

# the map structures and some in-flight memory duringoperations before/after cache entries can be

# accounted against the cache capacity. This overheadis usually small compared to the whole capacity.

# Do not specify more memory that the system canafford in the worst usual situation and leave some

# headroom for OS block level cache. Do never allowyour system to swap.

#

# Default value is 0, to disable row caching.

row_cache_size_in_mb: 0

 

# Duration in seconds after which Cassandra shouldsave the row cache.

# Caches are saved to saved_caches_directory asspecified in this configuration file.

#

# Saved caches greatly improve cold-start speeds, andis relatively cheap in

# terms of I/O for the key cache. Row cache saving ismuch more expensive and

# has limited use.

#

# Default is 0 to disable saving the row cache.

row_cache_save_period: 0

 

# Number of keys from the row cache to save.

# Specify 0 (which is the default), meaning all keysare going to be saved

# row_cache_keys_to_save: 100

 

# Maximum size of the counter cache in memory.

#

# Counter cache helps to reduce counter locks'contention for hot counter cells.

# In case of RF = 1 a counter cache hit will causeCassandra to skip the read before

# write entirely. With RF > 1 a counter cache hitwill still help to reduce the duration

# of the lock hold, helping with hot counter cellupdates, but will not allow skipping

# the read entirely. Only the local (clock, count)tuple of a counter cell is kept

# in memory, not the whole counter, so it's relativelycheap.

#

# NOTE: if you reduce the size, you may not get youhottest keys loaded on startup.

#

# Default value is empty to make it "auto"(min(2.5% of Heap (in MB), 50MB)). Set to 0 to disable count

er cache.

# NOTE: if you perform counter deletes and rely on lowgcgs, you should disable the counter cache.

counter_cache_size_in_mb:

 

# Duration in seconds after which Cassandra should

# save the counter cache (keys only). Caches are savedto saved_caches_directory as

# specified in this configuration file.

#

# Default is 7200 or 2 hours.

counter_cache_save_period: 7200

 

# Number of keys from the counter cache to save

# Disabled by default, meaning all keys are going tobe saved

# counter_cache_keys_to_save: 100

 

# saved caches

# If not set, the default directory is$CASSANDRA_HOME/data/saved_caches.

# saved_caches_directory:/var/lib/cassandra/saved_caches

saved_caches_directory:/data/users/cassandra01/cassandra/saved_caches

 

# commitlog_sync may be either "periodic" or"batch."

#

# When in batch mode, Cassandra won't ack writes untilthe commit log

# has been fsynced to disk.  It will wait

# commitlog_sync_batch_window_in_ms millisecondsbetween fsyncs.

# This window should be kept short because the writerthreads will

# be unable to do extra work while waiting.  (You may need to increase

# concurrent_writes for the same reason.)

#

# commitlog_sync: batch

# commitlog_sync_batch_window_in_ms: 2

#

# the other option is "periodic" wherewrites may be acked immediately

# and the CommitLog is simply synced everycommitlog_sync_period_in_ms

# milliseconds.

commitlog_sync: periodic

commitlog_sync_period_in_ms: 10000

 

# The size of the individual commitlog filesegments.  A commitlog

# segment may be archived, deleted, or recycled onceall the data

# in it (potentially from each columnfamily in thesystem) has been

# flushed to sstables. 

#

# The default size is 32, which is almost always fine,but if you are

# archiving commitlog segments (seecommitlog_archiving.properties),

# then you probably want a finer granularity ofarchiving; 8 or 16 MB

# is reasonable.

commitlog_segment_size_in_mb: 32

 

# Compression to apply to the commit log. If omitted,the commit log

# will be written uncompressed.  LZ4, Snappy, and Deflate compressors

# are supported.

#commitlog_compression:

#   -class_name: LZ4Compressor

#    parameters:

#         -

 

# any class that implements the SeedProvider interfaceand has a

# constructor that takes a Mapof parameters will do.

seed_provider:

    # Addressesof hosts that are deemed contact points.

    # Cassandra nodes use this list of hosts tofind each other and learn

    # thetopology of the ring.  You must changethis if you are running

    # multiplenodes!

    -class_name: org.apache.cassandra.locator.SimpleSeedProvider

     parameters:

          #seeds is actually a comma-delimited list of addresses.

          # Ex:",,"

          - seeds: "mongodb-test8,mongodb-test9"  #1120节点主机名或IP

 

# For workloads with more data than can fit in memory,Cassandra's

# bottleneck will be reads that need to fetch datafrom

# disk. "concurrent_reads" should be set to(16 * number_of_drives) in

# order to allow the operations to enqueue low enoughin the stack

# that the OS and drives can reorder them. Sameapplies to

# "concurrent_counter_writes", since counterwrites read the current

# values before incrementing and writing them back.

#

# On the other hand, since writes are almost never IObound, the ideal

# number of "concurrent_writes" is dependenton the number of cores in

# your system; (8 * number_of_cores) is a good rule ofthumb.

concurrent_reads: 32

concurrent_writes: 32

concurrent_counter_writes: 32

 

# Total memory to use for sstable-readingbuffers.  Defaults to

# the smaller of 1/4 of heap or 512MB.

# file_cache_size_in_mb: 512

 

# Total permitted memory to use for memtables.Cassandra will stop

# accepting writes when the limit is exceeded until aflush completes,

# and will trigger a flush based onmemtable_cleanup_threshold

# If omitted, Cassandra will set both to 1/4 the sizeof the heap.

# memtable_heap_space_in_mb: 2048

# memtable_offheap_space_in_mb: 2048

 

# Ratio of occupied non-flushing memtable size tototal permitted size

# that will trigger a flush of the largest memtable.Larger mct will

# mean larger flushes and hence less compaction, butalso less concurrent

# flush activity which can make it difficult to keepyour disks fed

# under heavy write load.

#

# memtable_cleanup_threshold defaults to 1 /(memtable_flush_writers + 1)

# memtable_cleanup_threshold: 0.11

 

# Specify the way Cassandra allocates and managesmemtable memory.

# Options are:

#  heap_buffers:    on heap niobuffers

#  offheap_buffers: off heap (direct) nio buffers

#  offheap_objects: native memory, eliminating nio buffer heap overhead

memtable_allocation_type: heap_buffers

 

# Total space to use for commit logs on disk.

#

# If space gets above this value, Cassandra will flushevery dirty CF

# in the oldest segment and remove it.  So a small total commitlog space

# will tend to cause more flush activity onless-active columnfamilies.

#

# The default value is the smaller of 8192, and 1/4 ofthe total space

# of the commitlog volume.

#

# commitlog_total_space_in_mb: 8192

 

# This sets the amount of memtable flush writerthreads.  These will

# be blocked by disk io, and each one will hold amemtable in memory

# while blocked.

#

# memtable_flush_writers defaults to the smaller of(number of disks,

# number of cores), with a minimum of 2 and a maximumof 8.

#

# If your data directories are backed by SSD, youshould increase this

# to the number of cores.

#memtable_flush_writers: 8

 

# A fixed memory pool size in MB for for SSTable indexsummaries. If left

# empty, this will default to 5% of the heap size. Ifthe memory usage of

# all index summaries exceeds this limit, SSTableswith low read rates will

# shrink their index summaries in order to meet thislimit.  However, this

# is a best-effort process. In extreme conditionsCassandra may need to use

# more than this amount of memory.

index_summary_capacity_in_mb:

 

# How frequently index summaries should beresampled.  This is done

# periodically to redistribute memory from thefixed-size pool to sstables

# proportional their recent read rates.  Setting to -1 will disable this

# process, leaving existing index summaries at theircurrent sampling level.

index_summary_resize_interval_in_minutes: 60

 

# Whether to, when doing sequential writing, fsync()at intervals in

# order to force the operating system to flush thedirty

# buffers. Enable this to avoid sudden dirty bufferflushing from

# impacting read latencies. Almost always a good ideaon SSDs; not

# necessarily on platters.

trickle_fsync: false

trickle_fsync_interval_in_kb: 10240

 

# TCP port, for commands and data

# For security reasons, you should not expose thisport to the internet.  Firewall it ifneeded.

storage_port: 7000

 

# SSL port, for encrypted communication.  Unused unless enabled in

# encryption_options

# For security reasons, you should not expose thisport to the internet.  Firewall it ifneeded.

ssl_storage_port: 7001

 

# Address or interface to bind to and tell otherCassandra nodes to connect to.

# You _must_ change this if you want multiple nodes tobe able to communicate!

#

# Set listen_address OR listen_interface, not both.Interfaces must correspond

# to a single address, IP aliasing is not supported.

#

# Leaving it blank leaves it up toInetAddress.getLocalHost(). This

# will always do the Right Thing _if_ the node isproperly configured

# (hostname, name resolution, etc), and the RightThing is to use the

# address associated with the hostname (it might notbe).

#

# Setting listen_address to 0.0.0.0 is always wrong.

#

# If you choose to specify the interface by name andthe interface has an ipv4 and an ipv6 address

# you can specify which should be chosen usinglisten_interface_prefer_ipv6. If false the first ipv4

# address will be used. If true the first ipv6 addresswill be used. Defaults to false preferring

# ipv4. If there is only one address it will beselected regardless of ipv4/ipv6.

listen_address: 172.28.18.10

# listen_interface: eth0

# listen_interface_prefer_ipv6: false

 

# Address to broadcast to other Cassandra nodes

# Leaving this blank will set it to the same value aslisten_address

# broadcast_address: 1.2.3.4

 

# Internode authentication backend, implementingIInternodeAuthenticator;

# used to allow/disallow connections from peer nodes.

# internode_authenticator:org.apache.cassandra.auth.AllowAllInternodeAuthenticator

 

# Whether to start the native transport server.

# Please note that the address on which the nativetransport is bound is the

# same as the rpc_address. The port however isdifferent and specified below.

start_native_transport: true

# port for the CQL native transport to listen forclients on

# For security reasons, you should not expose thisport to the internet.  Firewall it ifneeded.

native_transport_port: 9042

# The maximum threads for handling requests when thenative transport is used.

# This is similar to rpc_max_threads though thedefault differs slightly (and

# there is no native_transport_min_threads, idlethreads will always be stopped

# after 30 seconds).

# native_transport_max_threads: 128

#

# The maximum size of allowed frame. Frame (requests)larger than this will

# be rejected as invalid. The default is 256MB.

# native_transport_max_frame_size_in_mb: 256

 

# The maximum number of concurrent client connections.

# The default is -1, which means unlimited.

# native_transport_max_concurrent_connections: -1

 

# The maximum number of concurrent client connectionsper source ip.

# The default is -1, which means unlimited.

# native_transport_max_concurrent_connections_per_ip:-1

 

# Whether to start the thrift rpc server.

start_rpc: false

 

# The address or interface to bind the Thrift RPCservice and native transport

# server to.

#

# Set rpc_address OR rpc_interface, not both.Interfaces must correspond

# to a single address, IP aliasing is not supported.

#

# Leaving rpc_address blank has the same effect as onlisten_address

# (i.e. it will be based on the configured hostname ofthe node).

#

# Note that unlike listen_address, you can specify0.0.0.0, but you must also

# set broadcast_rpc_address to a value other than0.0.0.0.

#

# For security reasons, you should not expose thisport to the internet.  Firewall it ifneeded.

#

# If you choose to specify the interface by name andthe interface has an ipv4 and an ipv6 address

# you can specify which should be chosen usingrpc_interface_prefer_ipv6. If false the first ipv4

# address will be used. If true the first ipv6 addresswill be used. Defaults to false preferring

# ipv4. If there is only one address it will beselected regardless of ipv4/ipv6.

rpc_address: 172.28.18.10

# rpc_interface: eth1

# rpc_interface_prefer_ipv6: false

 

# port for Thrift to listen for clients on

rpc_port: 9160

 

# RPC address to broadcast to drivers and otherCassandra nodes. This cannot

# be set to 0.0.0.0. If left blank, this will be setto the value of

# rpc_address. If rpc_address is set to 0.0.0.0,broadcast_rpc_address must

# be set.

# broadcast_rpc_address: 1.2.3.4

 

# enable or disable keepalive on rpc/nativeconnections

rpc_keepalive: true

 

# Cassandra provides two out-of-the-box options forthe RPC Server:

#

# sync  ->One thread per thrift connection. For a very large number of clients, memory

#          willbe your limiting factor. On a 64 bit JVM, 180KB is the minimum stack size

#          per thread, and that will correspond to youruse of virtual memory (but physical memory

#          maybe limited depending on use of stack space).

#

# hsha  ->Stands for "half synchronous, half asynchronous." All thrift clientsare handled

#          asynchronouslyusing a small number of threads that does not vary with the amount

#          ofthrift clients (and thus scales well to many clients). The rpc requests arestill

#         synchronous (one thread per active request). If hsha is selected then itis essential

#          thatrpc_max_threads is changed from the default value of unlimited.

#

# The default is sync because on Windows hsha is about30% slower.  On Linux,

# sync/hsha performance is about the same, with hshaof course using less memory.

#

# Alternatively, can provide your own RPC server by providing the fully-qualified classname

# of an o.a.c.t.TServerFactory that can create aninstance of it.

rpc_server_type: sync

 

# Uncomment rpc_min|max_thread to set request poolsize limits.

#

# Regardless of your choice of RPC server (see above),the number of maximum requests in the

# RPC thread pool dictates how many concurrentrequests are possible (but if you are using the sync

# RPC server, it also dictates the number of clientsthat can be connected at all).

#

# The default is unlimited and thus provides noprotection against clients overwhelming the server. You

 are

# encouraged to set a maximum that makes sense for youin production, but do keep in mind that

# rpc_max_threads represents the maximum number ofclient requests this server may execute concurrently

.

#

# rpc_min_threads: 16

# rpc_max_threads: 2048

 

# uncomment to set socket buffer sizes on rpcconnections

# rpc_send_buff_size_in_bytes:

# rpc_recv_buff_size_in_bytes:

 

# Uncomment to set socket buffer size for internodecommunication

# Note that when setting this, the buffer size islimited by net.core.wmem_max

# and when not setting it it is defined bynet.ipv4.tcp_wmem

# See:

# /proc/sys/net/core/wmem_max

# /proc/sys/net/core/rmem_max

# /proc/sys/net/ipv4/tcp_wmem

# /proc/sys/net/ipv4/tcp_wmem

# and: man tcp

# internode_send_buff_size_in_bytes:

# internode_recv_buff_size_in_bytes:

 

# Frame size for thrift (maximum message length).

thrift_framed_transport_size_in_mb: 15

 

# Set to true to have Cassandra create a hard link toeach sstable

# flushed or streamed locally in a backups/subdirectory of the

# keyspace data. Removing these links is the operator's

# responsibility.

incremental_backups: false

 

# Whether or not to take a snapshot before eachcompaction.  Be

# careful using this option, since Cassandra won'tclean up the

# snapshots for you. Mostly useful if you're paranoid when there

# is a data format change.

snapshot_before_compaction: false

 

# Whether or not a snapshot is taken of the databefore keyspace truncation

# or dropping of column families. The STRONGLY adviseddefault of true

# should be used to provide data safety. If you setthis flag to false, you will

# lose data on truncation or drop.

auto_snapshot: true

 

# When executing a scan, within or across a partition,we need to keep the

# tombstones seen in memory so we can return them tothe coordinator, which

# will use them to make sure other replicas also knowabout the deleted rows.

# With workloads that generate a lot of tombstones,this can cause performance

# problems and even exaust the server heap.

# (http://www.datastax.com/dev/blog/cassandra-anti-patterns-queues-and-queue-like-datasets)

# Adjust the thresholds here if you understand thedangers and want to

# scan more tombstones anyway.  These thresholds may also be adjusted atruntime

# using the StorageService mbean.

tombstone_warn_threshold: 1000

tombstone_failure_threshold: 100000

 

# Granularity of the collation index of rows within apartition.

# Increase if your rows are large, or if you have avery large

# number of rows per partition.  The competing goals are these:

#   1) a smallergranularity means more index entries are generated

#      andlooking up rows withing the partition by collation column

#      is faster

#   2) but,Cassandra will keep the collation index in memory for hot

#      rows (aspart of the key cache), so a larger granularity means

#      you cancache more hot rows

column_index_size_in_kb: 64

 

 

# Log WARN on any batch size exceeding this value. 5kbper batch by default.

# Caution should be taken on increasing the size ofthis threshold as it can lead to node instability.

batch_size_warn_threshold_in_kb: 5

 

# Fail any batch exceeding this value. 50kb (10x warnthreshold) by default.

batch_size_fail_threshold_in_kb: 50

 

# Number of simultaneous compactions to allow, NOTincluding

# validation "compactions" for anti-entropyrepair.  Simultaneous

# compactions can help preserve read performance in amixed read/write

# workload, by mitigating the tendency of smallsstables to accumulate

# during a single long running compactions. Thedefault is usually

# fine and if you experience problems with compactionrunning too

# slowly or too fast, you should look at

# compaction_throughput_mb_per_sec first.

#

# concurrent_compactors defaults to the smaller of(number of disks,

# number of cores), with a minimum of 2 and a maximumof 8.

#

# If your data directories are backed by SSD, youshould increase this

# to the number of cores.

#concurrent_compactors: 1

 

# Throttles compaction to the given total throughputacross the entire

# system. The faster you insert data, the faster youneed to compact in

# order to keep the sstable count down, but ingeneral, setting this to

# 16 to 32 times the rate you are inserting data ismore than sufficient.

# Setting this to 0 disables throttling. Note thatthis account for all types

# of compaction, including validation compaction.

compaction_throughput_mb_per_sec: 16

 

# Log a warning when compacting partitions larger thanthis value

compaction_large_partition_warning_threshold_mb: 100

 

# When compacting, the replacement sstable(s) can beopened before they

# are completely written, and used in place of theprior sstables for

# any range that has been written. This helps tosmoothly transfer reads

# between the sstables, reducing page cache churn andkeeping hot rows hot

sstable_preemptive_open_interval_in_mb: 50

 

# Throttles all outbound streaming file transfers onthis node to the

# given total throughput in Mbps. This is necessarybecause Cassandra does

# mostly sequential IO when streaming data duringbootstrap or repair, which

# can lead to saturating the network connection anddegrading rpc performance.

# When unset, the default is 200 Mbps or 25 MB/s.

# stream_throughput_outbound_megabits_per_sec: 200

 

# Throttles all streaming file transfer between thedatacenters,

# this setting allows users to throttle inter dcstream throughput in addition

# to throttling all network stream traffic asconfigured with

# stream_throughput_outbound_megabits_per_sec

#inter_dc_stream_throughput_outbound_megabits_per_sec:

 

# How long the coordinator should wait for readoperations to complete

read_request_timeout_in_ms: 5000

# How long the coordinator should wait for seq orindex scans to complete

range_request_timeout_in_ms: 10000

# How long the coordinator should wait for writes tocomplete

write_request_timeout_in_ms: 2000

# How long the coordinator should wait for counterwrites to complete

counter_write_request_timeout_in_ms: 5000

# How long a coordinator should continue to retry aCAS operation

# that contends with other proposals for the same row

cas_contention_timeout_in_ms: 1000

# How long the coordinator should wait for truncatesto complete

# (This can be much longer, because unlessauto_snapshot is disabled

# we need to flush first so we can snapshot beforeremoving the data.)

truncate_request_timeout_in_ms: 60000

# The default timeout for other, miscellaneousoperations

request_timeout_in_ms: 10000

 

# Enable operation timeout information exchangebetween nodes to accurately

# measure request timeouts.  If disabled, replicas will assume thatrequests

# were forwarded to them instantly by the coordinator,which means that

# under overload conditions we will waste that muchextra time processing

# already-timed-out requests.

#

# Warning: before enabling this property make sure tontp is installed

# and the times are synchronized between the nodes.

cross_node_timeout: false

 

# Enable socket timeout for streaming operation.

# When a timeout occurs during streaming, streaming isretried from the start

# of the current file. This _can_ involve re-streamingan important amount of

# data, so you should avoid setting the value too low.

# Default value is 3600000, which means streamstimeout after an hour.

# streaming_socket_timeout_in_ms: 3600000

 

# phi value that must be reached for a host to bemarked down.

# most users should never need to adjust this.

# phi_convict_threshold: 8

 

# endpoint_snitch -- Set this to a class thatimplements

# IEndpointSnitch. The snitch has two functions:

# - it teaches Cassandra enough about your networktopology to route

#   requestsefficiently

# - it allows Cassandra to spread replicas around yourcluster to avoid

#   correlatedfailures. It does this by grouping machines into

#  "datacenters" and "racks."  Cassandra will do its best not to have

#   more thanone replica on the same "rack" (which may not actually

#   be aphysical location)

#

# IF YOU CHANGE THE SNITCH AFTER DATA IS INSERTED INTOTHE CLUSTER,

# YOU MUST RUN A FULL REPAIR, SINCE THE SNITCH AFFECTSWHERE REPLICAS

# ARE PLACED.

#

# IF THE RACK A REPLICA IS PLACED IN CHANGES AFTER THEREPLICA HAS BEEN

# ADDED TO A RING, THE NODE MUST BE DECOMMISSIONED ANDREBOOTSTRAPPED.

#

# Out of the box, Cassandra provides

#  -SimpleSnitch:

#    TreatsStrategy order as proximity. This can improve cache

#    localitywhen disabling read repair.  Onlyappropriate for

#   single-datacenter deployments.

#  -GossipingPropertyFileSnitch

#    This shouldbe your go-to snitch for production use. The rack

#    anddatacenter for the local node are defined in

#   cassandra-rackdc.properties and propagated to other nodes via

#   gossip.  Ifcassandra-topology.properties exists, it is used as a

#    fallback,allowing migration from the PropertyFileSnitch.

#  -PropertyFileSnitch:

#    Proximityis determined by rack and data center, which are

#    explicitlyconfigured in cassandra-topology.properties.

#  - Ec2Snitch:

#    Appropriatefor EC2 deployments in a single Region. Loads Region

#    andAvailability Zone information from the EC2 API. The Region is

#    treated asthe datacenter, and the Availability Zone as the rack.

#    Onlyprivate IPs are used, so this will not work across multiple

#    Regions.

#  -Ec2MultiRegionSnitch:

#    Uses publicIPs as broadcast_address to allow cross-region

#   connectivity.  (Thus, you shouldset seed addresses to the public

#    IP aswell.) You will need to open the storage_port or

#   ssl_storage_port on the public IP firewall.  (For intra-Region

#    traffic,Cassandra will switch to the private IP after

#   establishing a connection.)

#  -RackInferringSnitch:

#    Proximityis determined by rack and data center, which are

#    assumed tocorrespond to the 3rd and 2nd octet of each node's IP

#    address,respectively.  Unless this happens tomatch your

#    deploymentconventions, this is best used as an example of

#    writing acustom Snitch class and is provided in that spirit.

#

# You can use a custom Snitch by setting this to thefull class name

# of the snitch, which will be assumed to be on yourclasspath.

endpoint_snitch: SimpleSnitch #假设所有服务器都在同一个机架里面,就没有机架策略了;假设所有机器都是平等的,分布在同一个局域网里面。

 

# controls how often to perform the more expensivepart of host score

# calculation

dynamic_snitch_update_interval_in_ms: 100

# controls how often to reset all host scores,allowing a bad host to

# possibly recover

dynamic_snitch_reset_interval_in_ms: 600000

# if set greater than zero and read_repair_chance is< 1.0, this will allow

# 'pinning' of replicas to hosts in order to increasecache capacity.

# The badness threshold will control how much worsethe pinned host has to be

# before the dynamic snitch will prefer other replicasover it.  This is

# expressed as a double which represents apercentage.  Thus, a value of

# 0.2 means Cassandra would continue to prefer thestatic snitch values

# until the pinned host was 20% worse than thefastest.

dynamic_snitch_badness_threshold: 0.1

 

# request_scheduler -- Set this to a class thatimplements

# RequestScheduler, which will schedule incoming clientrequests

# according to the specific policy. This is useful formulti-tenancy

# with a single Cassandra cluster.

# NOTE: This is specifically for requests from theclient and does

# not affect inter node communication.

# org.apache.cassandra.scheduler.NoScheduler - Noscheduling takes place

# org.apache.cassandra.scheduler.RoundRobinScheduler -Round robin of

# client requests to a node with a separate queue foreach

# request_scheduler_id. The scheduler is furthercustomized by

# request_scheduler_options as described below.

request_scheduler:org.apache.cassandra.scheduler.NoScheduler

 

# Scheduler Options vary based on the type ofscheduler

# NoScheduler - Has no options

# RoundRobin

#  -throttle_limit -- The throttle_limit is the number of in-flight

#                     requests per client.  Requestsbeyond

#                     that limit are queued up until

#                     running requests can complete.

#                     The value of 80 here is twice the number of

#                      concurrent_reads + concurrent_writes.

#  -default_weight -- default_weight is optional and allows for

#                     overriding the default which is 1.

#  - weights --Weights are optional and will default to 1 or the

#              overridden default_weight. The weight translates into how

#              many requests are handled during each turn of the

#              RoundRobin, based on the scheduler id.

#

# request_scheduler_options:

#   throttle_limit: 80

#   default_weight: 5

#    weights:

#     Keyspace1: 1

#     Keyspace2: 5

 

# request_scheduler_id -- An identifier based on whichto perform

# the request scheduling. Currently the only validoption is keyspace.

# request_scheduler_id: keyspace

 

# Enable or disable inter-node encryption

# Default settings are TLS v1, RSA 1024-bit keys (itis imperative that

# users generate their own keys)TLS_RSA_WITH_AES_128_CBC_SHA as the cipher

# suite for authentication, key exchange andencryption of the actual data transfers.

# Use the DHE/ECDHE ciphers if running in FIPS 140compliant mode.

# NOTE: No custom encryption options are enabled atthe moment

# The available internode options are : all, none, dc,rack

#

# If set to dc cassandra will encrypt the trafficbetween the DCs

# If set to rack cassandra will encrypt the trafficbetween the racks

#

# The passwords used in these options must match thepasswords used when generating

# the keystore and truststore.  For instructions on generating these files,see:

# http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeyst

ore

#

server_encryption_options:

    internode_encryption:none

    keystore:conf/.keystore

   keystore_password: cassandra

    truststore:conf/.truststore

   truststore_password: cassandra

    # Moreadvanced defaults below:

    # protocol:TLS

    # algorithm:SunX509

    #store_type: JKS

    #cipher_suites:[TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_12

8_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_25

6_CBC_SHA]

    #require_client_auth: false

 

# enable or disable client/server encryption.

client_encryption_options:

    enabled:false

    # If enabledand optional is set to true encrypted and unencrypted connections are handled.

    optional:false

    keystore:conf/.keystore

   keystore_password: cassandra

    #require_client_auth: false

    # Settrustore and truststore_password if require_client_auth is true

    #truststore: conf/.truststore

    #truststore_password: cassandra

    # Moreadvanced defaults below:

    # protocol:TLS

    # algorithm:SunX509

    #store_type: JKS

    #cipher_suites:[TLS_RSA_WITH_AES_128_CBC_SHA,TLS_RSA_WITH_AES_256_CBC_SHA,TLS_DHE_RSA_WITH_AES_12

8_CBC_SHA,TLS_DHE_RSA_WITH_AES_256_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA,TLS_ECDHE_RSA_WITH_AES_25

6_CBC_SHA]

 

# internode_compression controls whether trafficbetween nodes is

# compressed.

# can be: all  - all traffic is compressed

#         dc   - traffic between differentdatacenters is compressed

#          none- nothing is compressed.

internode_compression: all

 

# Enable or disable tcp_nodelay for inter-dccommunication.

# Disabling it will result in larger (but fewer)network packets being sent,

# reducing overhead from the TCP protocol itself, atthe cost of increasing

# latency if you block for cross-datacenter responses.

inter_dc_tcp_nodelay: false

 

# TTL for different trace types used during logging ofthe repair process.

tracetype_query_ttl: 86400

tracetype_repair_ttl: 604800

 

# GC Pauses greater than gc_warn_threshold_in_ms willbe logged at WARN level

# Adjust the threshold based on your applicationthroughput requirement

# By default, Cassandra logs GC Pauses greater than200 ms at INFO level

# gc_warn_threshold_in_ms: 1000

 

# UDFs (user defined functions) are disabled bydefault.

# As of Cassandra 2.2, there is no security manager oranything else in place that

# prevents execution of evil code. CASSANDRA-9402 willfix this issue for Cassandra 3.0.

# This will inherently be backwards-incompatible withany 2.2 UDF that perform insecure

# operations such as opening a socket or writing tothe filesystem.

enable_user_defined_functions: false

 

# The default Windows kernel timer and schedulingresolution is 15.6ms for power conservation.

# Lowering this value on Windows can provide much tighterlatency and better throughput, however

# some virtualized environments may see a negativeperformance impact from changing this setting

# below their system default. The sysinternals'clockres' tool can confirm your system's default

# setting.

windows_timer_interval: 1

 

 

 

 

、、、、、、

endpoint_snitch:SimpleSnitch--假设所有服务器都在同一个机架里面,就没有机架策略了;

假设所有机器都是平等的,分布在同一个局域网里面。

 

 

编辑配置文件logback.xml

/data/users/cassandra01/cassandra/logs/system.log

 

配置cassandra-env.sh

vicassandra-env.sh

MAX_HEAP_SIZE根据系统物理内存设置;HEAP_NEWSIZE设置为MAX_HEAP_SIZE/4;这两个参数会自动设置,可以去掉注释指定具体值.

----------------------------------------------------------

MAX_HEAP_SIZE="4G"

HEAP_NEWSIZE="800M"

 

 

 

 

 

 

配置完毕10节点,将配置好的scp1120节点

$scp-rcassandra/cassandra01@mongodb-test8:/data/users/cassandra01

$scp-rcassandra/cassandra01@mongodb-test9:/data/users/cassandra01

 

在各个节点上对应修改cassandra.yaml(只修改如下)

11主机

listen_address:172.28.18.11

rpc_address:172.28.18.11

 

20主机

listen_address:172.28.18.20

rpc_address:172.28.18.20

 

 

按顺序启动

seeds,后main(10主机)

 

11主机

cassandra-f

 

20主机

cassandra-f

 

10主机

cassandra-f

 

这些启动都是在前台启动,使用&在后台启动运行

 

10主机main启动cli

(需要事先安装python2.7以上版本)

cqlsh mongodb-test7

 

 

测试

create keyspace mykeyspacewith replication={'class':'SimpleStrategy','replication_factor':1};

create keyspace excelsior with replication ={'calss':'SimpleStrategy','replication_factor':3};

create keyspace "Excalibur" with replication= {'class':'NetworkTopologyStrategy','dc1':3,'dc2':2};

第一个副本因子是3个,第二个副本因子是2个;

use mykeyspace;

create table users(userid int primary key, fn text, lntext, number int);

 

insert into users(userid, number) values(1 ,2);

insert into users(userid, fn, ln, number) values(1, 'a','b', 3);

 

select avg(number) from users;

注意:

如果出现这样的错误:

No single argument constructor found for class[Ljava.lang.String;……

 

有可能是cassandra.yaml配置文件的格式有问题

注意!

data_file_directories:

 - /cassandradata/data

 

insert into users(userid, fn, ln, number) values(2,'2a', '2b', 23);

insert into users(userid, fn, ln, number) values(3,'2a', '2b', 33);

insert into users(userid, fn, ln, number) values(4,'2a', '2b', 33);

insert into users(userid, fn, ln, number) values(5,'2a', '2b', 33);

insert into users(userid, fn, ln, number) values(6,'2a', '2b', 33);

insert into users(userid, fn, ln, number) values(7,'2a', '2b', 33);

insert into users(userid, fn, ln, number) values(8,'2a', '2b', 33);