Subscribe To Corey's Blog Subscribe To Goldblog goldb.org home

corestats.py | Python Statistical Calculations

statistical calculation class for processing numeric sequences

Code:


#!/usr/bin/env python

#  corestats.py (COREy STATS) 
#  Copyright (c) 2006-2007, Corey Goldberg (corey@goldb.org)
#    
#    statistical calculation class
#    for processing numeric sequences
#
#  license: GNU LGPL
#
#  This library is free software; you can redistribute it and/or
#  modify it under the terms of the GNU Lesser General Public
#  License as published by the Free Software Foundation; either
#  version 2.1 of the License, or (at your option) any later version.



import sys


class Stats:
        
    def __init__(self, sequence):
        # sequence of numbers we will process
        # convert all items to floats for numerical processing        
        self.sequence = [float(item) for item in sequence]

    
    def sum(self):
        if len(self.sequence) < 1: 
            return None
        else:
            return sum(self.sequence)
    
    
    def count(self):
        return len(self.sequence)

    
    def min(self):
        if len(self.sequence) < 1: 
            return None
        else:
            return min(self.sequence)
    
    
    def max(self):
        if len(self.sequence) < 1: 
            return None
        else:
            return max(self.sequence)
    

    def avg(self):
        if len(self.sequence) < 1: 
            return None
        else: 
            return sum(self.sequence) / len(self.sequence)    
    
    
    def median(self):
        if len(self.sequence) < 1: 
            return None
        else:
            self.sequence.sort()
            return self.sequence[len(self.sequence) // 2]
            
    
    def stdev(self):
        if len(self.sequence) < 1: 
            return None
        else:
            avg = self.avg()
            sdsq = sum([(i - avg) ** 2 for i in self.sequence])
            stdev = (sdsq / (len(self.sequence) - 1)) ** .5
            return stdev
    
    
    def percentile(self, percentile):
        if len(self.sequence) < 1: 
            value = None
        elif (percentile >= 100):
            sys.stderr.write('ERROR: percentile must be < 100.  you supplied: %s\n'% percentile)
            value = None
        else:
            element_idx = int(len(self.sequence) * (percentile / 100.0))
            self.sequence.sort()
            value = self.sequence[element_idx]
        return value
        
        
        

# Sample script using this class:
# -------------------------------------------    
#    #!/usr/bin/env python
#    import corestats
#    
#    sequence = [1, 2.5, 7, 13.4, 8.0]
#    stats = corestats.Stats(sequence)
#    print stats.avg()
#    print stats.percentile(90)
# -------------------------------------------  
        
Copyright © 2006-2007 Corey Goldberg  |