Friday, November 15, 2024
Google search engine
HomeLanguagesCreate a cumulative histogram in Matplotlib

Create a cumulative histogram in Matplotlib

The histogram is a graphical representation of data. We can represent any kind of numeric data in histogram format. In this article, We are going to see how to create a cumulative histogram in Matplotlib

Cumulative frequency: Cumulative frequency analysis is the analysis of the frequency of occurrence of values. It is the total of a frequency and all frequencies so far in a frequency distribution. 

Example:

X contains [1,2,3,4,5] then the cumulative frequency for x is [1,3,6,10,15].

Explanation:

[1,1+2,1+2+3,1+2+3+4,1+2+3+4+5]

In Python, we can generate a histogram with dataframe.hist, and cumulative frequency stats.cumfreq() histogram.

Example 1:

Python3




# importing pyplot for getting graph
import matplotlib.pyplot as plt
 
# importing numpy for getting array
import numpy as np
 
# importing scientific python
from scipy import stats
 
# list of values
x = [10, 40, 20, 10, 30, 10, 56, 45]
 
res = stats.cumfreq(x, numbins=4,
                    defaultreallimits=(1.5, 5))
 
# generating random values
rng = np.random.RandomState(seed=12345)
 
# normalizing
samples = stats.norm.rvs(size=1000,
                         random_state=rng)
 
res = stats.cumfreq(samples,
                    numbins=25)
 
x = res.lowerlimit + np.linspace(0, res.binsize*res.cumcount.size,
                                 res.cumcount.size)
 
# specifying figure size
fig = plt.figure(figsize=(10, 4))
 
# adding sub plots
ax1 = fig.add_subplot(1, 2, 1)
 
# adding sub plots
ax2 = fig.add_subplot(1, 2, 2)
 
# getting histogram using hist function
ax1.hist(samples, bins=25,
         color="green")
 
# setting up the title
ax1.set_title('Histogram')
 
# cumulative graph
ax2.bar(x, res.cumcount, width=4, color="blue")
 
# setting up the title
ax2.set_title('Cumulative histogram')
 
ax2.set_xlim([x.min(), x.max()])
 
# display the figure(histogram)
plt.show()


Output:

Example 2:

Python3




# importing numpy for getting array
import numpy as np
 
# importing scientific python
from scipy import stats
 
# list of values
x = [10, 40, 20, 10, 30, 10, 56, 45]
 
res = stats.cumfreq(x, numbins=4,
                    defaultreallimits=(1.5, 5))
 
# generating random values
rng = np.random.RandomState(seed=12345)
 
# normalizing
samples = stats.norm.rvs(size=1000,
                         random_state=rng)
 
res = stats.cumfreq(samples,
                    numbins=25)
 
x = res.lowerlimit + np.linspace(0, res.binsize*res.cumcount.size,
                                 res.cumcount.size)
 
fig = plt.figure(figsize=(10, 4))
 
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)
 
ax1.hist(samples, bins=25, color="green")
 
ax1.set_title('Histogram')
ax2.bar(x, x, width=2, color="blue")
 
ax2.set_title('Cumulative histogram')
ax2.set_xlim([x.min(), x.max()])
 
plt.show()


Output:

RELATED ARTICLES

Most Popular

Recent Comments