From owner-freebsd-cluster@FreeBSD.ORG  Mon Aug  6 08:20:29 2007
Return-Path: <owner-freebsd-cluster@FreeBSD.ORG>
Delivered-To: freebsd-cluster@freebsd.org
Received: from mx1.freebsd.org (mx1.freebsd.org [IPv6:2001:4f8:fff6::34])
	by hub.freebsd.org (Postfix) with ESMTP id 90AB316A498
	for <freebsd-cluster@freebsd.org>; Mon,  6 Aug 2007 08:20:29 +0000 (UTC)
	(envelope-from johndecot@yahoo.com)
Received: from web55404.mail.re4.yahoo.com (web55404.mail.re4.yahoo.com
	[206.190.58.198])
	by mx1.freebsd.org (Postfix) with SMTP id 2E5EE13C474
	for <freebsd-cluster@freebsd.org>; Mon,  6 Aug 2007 08:20:29 +0000 (UTC)
	(envelope-from johndecot@yahoo.com)
Received: (qmail 37503 invoked by uid 60001); 6 Aug 2007 07:53:47 -0000
DomainKey-Signature: a=rsa-sha1; q=dns; c=nofws; s=s1024; d=yahoo.com;
	h=X-YMail-OSG:Received:Date:From:Subject:To:MIME-Version:Content-Type:Content-Transfer-Encoding:Message-ID;
	b=1aLm8Cp+pZYDp3zBiKmdu0CJ0Xbo9es38UaTZ8WVYfjEWXhorMwiaOGCWTgS/dg84N/vofjJITCOe3Wtx1SzJRoazmh6gDTORCFxVbG/TK+P8nec/QiP03fXD1bvJZekKsSB6vRbhPDR9AcJ31XOOSOb3PSrf/DbGLwzR4UaA/o=;
X-YMail-OSG: 0EtiF2cVM1kbMjpxo.JG3A6M3y.moL65g3dU0zHrUKG7V8j7bO8UHe3YxOhP84DPai0NT3R00CXHJp0H8LuMheRobB9K9WXP44gjzMUdChT8Q_iaBOXeBlVf4mPE8.OzNlGzg.Ht8raCKeD60Jm.Nb4EO1oTePuVif21HMCn8OILxQc8cs8OKFsktcyJ.fNAgi1I5thS7FPHQweFB8g-
Received: from [63.219.2.3] by web55404.mail.re4.yahoo.com via HTTP;
	Mon, 06 Aug 2007 00:53:46 PDT
Date: Mon, 6 Aug 2007 00:53:46 -0700 (PDT)
From: john decot <johndecot@yahoo.com>
To: freebsd-cluster@freebsd.org
MIME-Version: 1.0
Message-ID: <962834.37025.qm@web55404.mail.re4.yahoo.com>
Content-Type: text/plain; charset=iso-8859-1
Content-Transfer-Encoding: 8bit
X-Content-Filtered-By: Mailman/MimeDel 2.1.5
Subject: metric problem 
X-BeenThere: freebsd-cluster@freebsd.org
X-Mailman-Version: 2.1.5
Precedence: list
List-Id: Clustering FreeBSD <freebsd-cluster.freebsd.org>
List-Unsubscribe: <http://lists.freebsd.org/mailman/listinfo/freebsd-cluster>, 
	<mailto:freebsd-cluster-request@freebsd.org?subject=unsubscribe>
List-Archive: <http://lists.freebsd.org/pipermail/freebsd-cluster>
List-Post: <mailto:freebsd-cluster@freebsd.org>
List-Help: <mailto:freebsd-cluster-request@freebsd.org?subject=help>
List-Subscribe: <http://lists.freebsd.org/mailman/listinfo/freebsd-cluster>,
	<mailto:freebsd-cluster-request@freebsd.org?subject=subscribe>
X-List-Received-Date: Mon, 06 Aug 2007 08:20:29 -0000

Hi all,
   
     As I am new user for clustering, I am trying LAM/MPI with ganglia. I have face a problem while monitoring ganglia with web. i.e can't locate metric for selected cluster.  I have telnet 127.0.0.1 8652 and results shows without metric. 

Trying ::1...
Trying 127.0.0.1...
Connected to localhost.
Escape character is '^]'.
<?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?>
<!DOCTYPE GANGLIA_XML [
   <!ELEMENT GANGLIA_XML (GRID|CLUSTER|HOST)*>
      <!ATTLIST GANGLIA_XML VERSION CDATA #REQUIRED>
      <!ATTLIST GANGLIA_XML SOURCE CDATA #REQUIRED>
   <!ELEMENT GRID (CLUSTER | GRID | HOSTS | METRICS)*>
      <!ATTLIST GRID NAME CDATA #REQUIRED>
      <!ATTLIST GRID AUTHORITY CDATA #REQUIRED>
      <!ATTLIST GRID LOCALTIME CDATA #IMPLIED>
   <!ELEMENT CLUSTER (HOST | HOSTS | METRICS)*>
      <!ATTLIST CLUSTER NAME CDATA #REQUIRED>
      <!ATTLIST CLUSTER OWNER CDATA #IMPLIED>
      <!ATTLIST CLUSTER LATLONG CDATA #IMPLIED>
      <!ATTLIST CLUSTER URL CDATA #IMPLIED>
      <!ATTLIST CLUSTER LOCALTIME CDATA #REQUIRED>
   <!ELEMENT HOST (METRIC)*>
      <!ATTLIST HOST NAME CDATA #REQUIRED>
      <!ATTLIST HOST IP CDATA #REQUIRED>
      <!ATTLIST HOST LOCATION CDATA #IMPLIED>
      <!ATTLIST HOST REPORTED CDATA #REQUIRED>
      <!ATTLIST HOST TN CDATA #IMPLIED>
      <!ATTLIST HOST TMAX CDATA #IMPLIED>
      <!ATTLIST HOST DMAX CDATA #IMPLIED>
      <!ATTLIST HOST GMOND_STARTED CDATA #IMPLIED>
   <!ELEMENT METRIC EMPTY>
      <!ATTLIST METRIC NAME CDATA #REQUIRED>
      <!ATTLIST METRIC VAL CDATA #REQUIRED>
      <!ATTLIST METRIC TYPE (string | int8 | uint8 | int16 | uint16 | int32 | uint32 | float | double | timestamp) #REQUIRED>
      <!ATTLIST METRIC UNITS CDATA #IMPLIED>
      <!ATTLIST METRIC TN CDATA #IMPLIED>
      <!ATTLIST METRIC TMAX CDATA #IMPLIED>
      <!ATTLIST METRIC DMAX CDATA #IMPLIED>
      <!ATTLIST METRIC SLOPE (zero | positive | negative | both | unspecified) #IMPLIED>
      <!ATTLIST METRIC SOURCE (gmond | gmetric) #REQUIRED>
   <!ELEMENT HOSTS EMPTY>
      <!ATTLIST HOSTS UP CDATA #REQUIRED>
      <!ATTLIST HOSTS DOWN CDATA #REQUIRED>
      <!ATTLIST HOSTS SOURCE (gmond | gmetric | gmetad) #REQUIRED>
   <!ELEMENT METRICS EMPTY>
      <!ATTLIST METRICS NAME CDATA #REQUIRED>
      <!ATTLIST METRICS SUM CDATA #REQUIRED>
      <!ATTLIST METRICS NUM CDATA #REQUIRED>
      <!ATTLIST METRICS TYPE (string | int8 | uint8 | int16 | uint16 | int32 | uint32 | float | double | timestamp) #REQUIRED>
      <!ATTLIST METRICS UNITS CDATA #IMPLIED>
      <!ATTLIST METRICS SLOPE (zero | positive | negative | both | unspecified) #IMPLIED>
      <!ATTLIST METRICS SOURCE (gmond | gmetric) #REQUIRED>
]>
<GANGLIA_XML VERSION="3.0.3" SOURCE="gmetad">
<GRID NAME="unspecified" AUTHORITY="http://ms.test.com/ganglia/" LOCALTIME="1186406352">
<CLUSTER NAME="my cluster" LOCALTIME="1186406351" OWNER="unspecified" LATLONG="unspecified" URL="unspecified">
</CLUSTER>
</GRID>
</GANGLIA_XML>


gmod.conf as follows:
/* This configuration is as close to 2.5.x default behavior as possible 
   The values closely match ./gmond/metric.h definitions in 2.5.x */ 
globals {                    
  daemonize = yes              
  setuid = yes            
  user = ganglia              
  debug_level = 0               
  max_udp_msg_len = 1472        
  mute = no             
  deaf = no             
  host_dmax = 0 /*secs */ 
  cleanup_threshold = 300 /*secs */ 
  gexec = no
} 

/* If a cluster attribute is specified, then all gmond hosts are wrapped inside 
 * of a <CLUSTER> tag.  If you do not specify a cluster tag, then all <HOSTS> will 
 * NOT be wrapped inside of a <CLUSTER> tag. */ 
cluster { 
  name = "my cluster" 
  owner = "unspecified" 
  latlong = "unspecified" 
  url = "unspecified" 
} 

/* The host section describes attributes of the host, like the location */ 
host { 
  location = "unspecified" 
} 

/* Feel free to specify as many udp_send_channels as you like.  Gmond 
   used to only support having a single channel */  
udp_send_channel { 
  #mcast_join = 239.2.11.71 
  port = 8649 
} 

/* You can specify as many udp_recv_channels as you like as well. */ 
udp_recv_channel { 
#  mcast_join = 239.2.11.71 
  port = 8649 
#  bind = 239.2.11.71 
} 

/* You can specify as many tcp_accept_channels as you like to share 
   an xml description of the state of the cluster */ 
tcp_accept_channel { 
  port = 8649 
} 


/* The old internal 2.5.x metric array has been replaced by the following 
   collection_group directives.  What follows is the default behavior for 
   collecting and sending metrics that is as close to 2.5.x behavior as 
   possible. */

/* This collection group will cause a heartbeat (or beacon) to be sent every 
   20 seconds.  In the heartbeat is the GMOND_STARTED data which expresses 
   the age of the running gmond. */ 
collection_group { 
  collect_once = yes 
  time_threshold = 20 
  metric { 
    name = "heartbeat" 
  } 
} 

/* This collection group will send general info about this host every 1200 secs. 
   This information doesn't change between reboots and is only collected once. */ 
collection_group { 
  collect_once = yes 
  time_threshold = 1200 
  metric { 
    name = "cpu_num" 
  } 
  metric { 
    name = "cpu_speed" 
  } 
  metric { 
    name = "mem_total" 
  } 
  /* Should this be here? Swap can be added/removed between reboots. */ 
  metric { 
    name = "swap_total" 
  } 
  metric { 
    name = "boottime" 
  } 
  metric { 
    name = "machine_type" 
  } 
  metric { 
    name = "os_name" 
  } 
  metric { 
    name = "os_release" 
  } 
  metric { 
    name = "location" 
  } 
} 

/* This collection group will send the status of gexecd for this host every 300 secs */
/* Unlike 2.5.x the default behavior is to report gexecd OFF.  */ 
collection_group { 
  collect_once = yes 
  time_threshold = 300 
  metric { 
    name = "gexec" 
  } 
} 

/* This collection group will collect the CPU status info every 20 secs. 
   The time threshold is set to 90 seconds.  In honesty, this time_threshold could be 
   set significantly higher to reduce unneccessary network chatter. */ 
collection_group { 
  collect_every = 20 
  time_threshold = 90 
  /* CPU status */ 
  metric { 
    name = "cpu_user"  
    value_threshold = "1.0" 
  } 
  metric { 
    name = "cpu_system"   
    value_threshold = "1.0" 
  } 
  metric { 
    name = "cpu_idle"  
    value_threshold = "5.0" 
  } 
  metric { 
    name = "cpu_nice"  
    value_threshold = "1.0" 
  } 
  metric { 
    name = "cpu_aidle" 
    value_threshold = "5.0" 
  } 
  metric { 
    name = "cpu_wio" 
    value_threshold = "1.0" 
  } 
  /* The next two metrics are optional if you want more detail... 
     ... since they are accounted for in cpu_system.  
  metric { 
    name = "cpu_intr" 
    value_threshold = "1.0" 
  } 
  metric { 
    name = "cpu_sintr" 
    value_threshold = "1.0" 
  } 
  */ 
} 

collection_group { 
  collect_every = 20 
  time_threshold = 90 
  /* Load Averages */ 
  metric { 
    name = "load_one" 
    value_threshold = "1.0" 
  } 
  metric { 
    name = "load_five" 
    value_threshold = "1.0" 
  } 
  metric { 
    name = "load_fifteen" 
    value_threshold = "1.0" 
  }
} 

/* This group collects the number of running and total processes */ 
collection_group { 
  collect_every = 80 
  time_threshold = 950 
  metric { 
    name = "proc_run" 
    value_threshold = "1.0" 
  } 
  metric { 
    name = "proc_total" 
    value_threshold = "1.0" 
  } 
}

/* This collection group grabs the volatile memory metrics every 40 secs and 
   sends them at least every 180 secs.  This time_threshold can be increased 
   significantly to reduce unneeded network traffic. */ 
collection_group { 
  collect_every = 40 
  time_threshold = 180 
  metric { 
    name = "mem_free" 
    value_threshold = "1024.0" 
  } 
  metric { 
    name = "mem_shared" 
    value_threshold = "1024.0" 
  } 
  metric { 
    name = "mem_buffers" 
    value_threshold = "1024.0" 
  } 
  metric { 
    name = "mem_cached" 
    value_threshold = "1024.0" 
  } 
  metric { 
    name = "swap_free" 
    value_threshold = "1024.0" 
  } 
} 

collection_group { 
  collect_every = 40 
  time_threshold = 300 
  metric { 
    name = "bytes_out" 
    value_threshold = 4096 
  } 
  metric { 
    name = "bytes_in" 
    value_threshold = 4096 
  } 
  metric { 
    name = "pkts_in" 
    value_threshold = 256 
  } 
  metric { 
    name = "pkts_out" 
    value_threshold = 256 
  } 
}

/* Different than 2.5.x default since the old config made no sense */ 
collection_group { 
  collect_every = 1800 
  time_threshold = 3600 
  metric { 
    name = "disk_total" 
    value_threshold = 1.0 
  } 
}

collection_group { 
  collect_every = 40 
  time_threshold = 180 
  metric { 
    name = "disk_free" 
    value_threshold = 1.0 
  } 
  metric { 
    name = "part_max_used" 
    value_threshold = 1.0 
  } 
}

gmetad.conf as follows :
# This is an example of a Ganglia Meta Daemon configuration file
#                http://ganglia.sourceforge.net/
#
# $Id: gmetad.conf,v 1.17 2005/03/15 18:15:05 massie Exp $
#
#-------------------------------------------------------------------------------
# Setting the debug_level to 1 will keep daemon in the forground and
# show only error messages. Setting this value higher than 1 will make 
# gmetad output debugging information and stay in the foreground.
# default: 0
# debug_level 10
#
#-------------------------------------------------------------------------------
# What to monitor. The most important section of this file. 
#
# The data_source tag specifies either a cluster or a grid to
# monitor. If we detect the source is a cluster, we will maintain a complete
# set of RRD databases for it, which can be used to create historical 
# graphs of the metrics. If the source is a grid (it comes from another gmetad),
# we will only maintain summary RRDs for it.
#
# Format: 
# data_source "my cluster" [polling interval] address1:port addreses2:port ...
# 
# The keyword 'data_source' must immediately be followed by a unique
# string which identifies the source, then an optional polling interval in 
# seconds. The source will be polled at this interval on average. 
# If the polling interval is omitted, 15sec is asssumed. 
#
# A list of machines which service the data source follows, in the 
# format ip:port, or name:port. If a port is not specified then 8649
# (the default gmond port) is assumed.
# default: There is no default value
#
# data_source "my cluster" 10 localhost  my.machine.edu:8649  1.2.3.5:8655
# data_source "my grid" 50 1.3.4.7:8655 grid.org:8651 grid-backup.org:8651
# data_source "another source" 1.3.4.7:8655  1.3.4.8
data_source "my cluster" 10 localhost

#
# Round-Robin Archives
# You can specify custom Round-Robin archives here (defaults are listed below)
#
# RRAs "RRA:AVERAGE:0.5:1:240" "RRA:AVERAGE:0.5:24:240" "RRA:AVERAGE:0.5:168:240" "RRA:AVERAGE:0.5:672:240" \
#      "RRA:AVERAGE:0.5:5760:370"
#

#
#-------------------------------------------------------------------------------
# Scalability mode. If on, we summarize over downstream grids, and respect
# authority tags. If off, we take on 2.5.0-era behavior: we do not wrap our output
# in <GRID></GRID> tags, we ignore all <GRID> tags we see, and always assume
# we are the "authority" on data source feeds. This approach does not scale to
# large groups of clusters, but is provided for backwards compatibility.
# default: on
# scalable off
#
#-------------------------------------------------------------------------------
# The name of this Grid. All the data sources above will be wrapped in a GRID
# tag with this name.
# default: Unspecified
# gridname "MyGrid"
#
#-------------------------------------------------------------------------------
# The authority URL for this grid. Used by other gmetads to locate graphs
# for our data sources. Generally points to a ganglia/
# website on this machine.
# default: "http://hostname/ganglia/",
#   where hostname is the name of this machine, as defined by gethostname().
# authority "http://mycluster.org/newprefix/"
#
#-------------------------------------------------------------------------------
# List of machines this gmetad will share XML with. Localhost
# is always trusted. 
# default: There is no default value
# trusted_hosts 127.0.0.1 169.229.50.165 my.gmetad.org
#
#-------------------------------------------------------------------------------
# If you want any host which connects to the gmetad XML to receive
# data, then set this value to "on"
# default: off
# all_trusted on
#
#-------------------------------------------------------------------------------
# If you don't want gmetad to setuid then set this to off
# default: on
# setuid off
#
#-------------------------------------------------------------------------------
# User gmetad will setuid to (defaults to "ganglia")
# default: "ganglia"
# setuid_username "ganglia"
#
#-------------------------------------------------------------------------------
# The port gmetad will answer requests for XML
# default: 8651
# xml_port 8651
#
#-------------------------------------------------------------------------------
# The port gmetad will answer queries for XML. This facility allows
# simple subtree and summation views of the XML tree.
# default: 8652
# interactive_port 8652
#
#-------------------------------------------------------------------------------
# The number of threads answering XML requests
# default: 4
# server_threads 10
#
#-------------------------------------------------------------------------------
# Where gmetad stores its round-robin databases
# default: "/var/db/ganglia/rrds"
# rrd_rootdir "/some/other/place"


so, could some help me to set metric 



Regards,
John

 
 



       
---------------------------------
Park yourself in front of a world of choices in alternative vehicles.
Visit the Yahoo! Auto Green Center.