import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Personal utils to remove cognitive clutter.
# (See 'https://github.com/teaochablog/comparing-wavelets-for-image-compression/utils')
import blogutils
blogutils.init_theme(sns)


      
       import pywt
import gzip
import json
from PIL import Image
from sklearn.base import BaseEstimator, ClassifierMixin

# The following two helper methods are what we'll use to turn
# the wavelet coefficients into a compressed bytestring:

def compress_array(arr, round_to: int=3):
    '''Stringifies a numpy array and then zips it into a bytestring.
    '''
    rounded = np.around(arr, round_to)
    as_string = json.dumps(rounded.tolist())
    compressed = gzip.compress(str.encode(as_string))
    return compressed

def compress_wavelet_coeffs(coeffs):
    '''Takes a list of wavelet coefficients and compresses
    it into a bytestring.
    '''
    cfs_approx = coeffs[0]
    cfs_detail = coeffs[1:]
    compressed = b''
    compressed += compress_array(cfs_approx)
    for cfs_d in cfs_detail:
        for detail_direction in cfs_d:
            compressed += compress_array(detail_direction)
    return compressed


# This following class is the workhorse of the experiment:

class WaveletEstimator(BaseEstimator, ClassifierMixin):
    def __init__(
            self, wavelet='bior1.3',
            decode_levels=5, remove_levels=0,
            scoring_ratio=0.5
        ):
        self.wavelet = wavelet
        self.decode_levels = decode_levels
        self.remove_levels = remove_levels
        self.scoring_ratio = scoring_ratio
        
        # Values that will be computed as the
        # estimator runs.
        self.coeffs = None
        self.reconstruction = None
        self.total_score = None
        self.compression_score = None
        self.reconstruction_score = None
              
    def fit(self, X, y=None):
        # The fit method computes the wavelet coefficients.
        # Effectively encoding the image.
        X_range_01 = X * 1.0/255
        self.coeffs = pywt.wavedec2(
            X_range_01,
            self.wavelet,
            level=self.decode_levels)
        return self
    
    def predict(self, X, y=None):
        # The predict method takes the previously encoded data (using the
        # 'fit' method) and reconstructs the image.
        
        # We remove levels of resoltion by multiplying the coefficients
        # at those levels by zero.
        coeffs_lowres = self.coeffs
        for l in range(self.remove_levels):
            coeffs_lowres[-(l+1)] = [
                np.zeros_like(coeffs_lowres[-(l+1)][0])
                for ll in range(len(coeffs_lowres[-(l+1)]))
            ]
        
        self.reconstruction = (
            pywt.waverec2(coeffs_lowres, self.wavelet)
        ) * 255
        
        # Wavelet transformation resolutions reduce by powers of two so
        # image sizes with odd dimensions can sometimes by off by a pixel
        # when reconstructed. This corrects that.
        if self.reconstruction.shape != X.shape:
            self.reconstruction = np.array(
                Image.fromarray(self.reconstruction).resize(
                    (X.shape[1], X.shape[0])
                )
            )
        self.reconstruction = self.reconstruction.astype(np.uint8)
        return self.reconstruction
    
    def score(self, X, y=None):
        # The score is the Total Wavelet Compression Entropy
        img_sz = len(y[0]) * len(y[1])
        
        # The compression score is the ratio of bytes required to store
        # the wavelet coefficients vs the number of bytes in the
        # uncompressed image.
        self.compression_score = (
            len(compress_wavelet_coeffs(self.coeffs)) / img_sz
        )
        
        # The Reconstruction score is the KL divergence between
        # the orginal image and the reconstruction.
        # To compute this KL-Divergence we first normalise the images
        # by treating them as a histogram.
        Y_dist = y * 1.0 / np.sum(y)
        R_dist = self.reconstruction * 1.0 / np.sum(self.reconstruction)
        self.reconstruction_score = blogutils.kl(Y_dist, R_dist)
        
        # The final score is a value (typically between 0.0 and 1.0)
        # expressing the cost per pixel.
        self.total_score = (
            ((1.0 - self.scoring_ratio) * self.compression_score) +
            (self.scoring_ratio * self.reconstruction_score)
        )
        return self.total_score


      
       import pandas as pd

WAVELETS = ['haar', 'db2', 'sym2', 'coif1', 'bior1.3', 'rbio1.3']

def compare_wavelets(
    sample_generator, wavelets=WAVELETS, samples_per_wavelet=10,
    decode_levels=5, remove_levels=[1, 2, 3, 4], scoring_ratio=0.8):
    df_columns=[
        'wavelet_fam', 'decode_levels',
        'remove_levels', 'total_score',
        'compression_score', 'reconstruction_score',
    ]

    df_rows = []

    for wavelet in wavelets:
        for s in range(samples_per_wavelet):
            for r in remove_levels:
                sample = np.array(next(sample_generator).convert('L'))
                est = WaveletEstimator(
                    wavelet=wavelet, decode_levels=decode_levels,
                    remove_levels=r, scoring_ratio=scoring_ratio)
                
                est.fit(sample, sample)
                est.predict(sample, sample)
                est.score(sample, sample)
                
                df_rows.append([
                    wavelet, decode_levels, r,
                    est.total_score, est.compression_score, est.reconstruction_score,
                ])

    return pd.DataFrame(data=df_rows, columns=df_columns)

	sum_sq	df	F	PR(>F)
C(wavelet_fam)	0.000252	5.0	0.083578	0.994755
Residual	0.141222	234.0	NaN	NaN

	sum_sq	df	F	PR(>F)
C(wavelet_fam)	0.027897	5.0	7.152142	0.000003
Residual	0.182547	234.0	NaN	NaN

	sum_sq	df	F	PR(>F)
C(wavelet_fam)	0.000252	5.0	0.083578	0.994755
Residual	0.141222	234.0	NaN	NaN

	sum_sq	df	F	PR(>F)
C(wavelet_fam)	0.000491	5.0	0.15502	0.978368
Residual	0.148372	234.0	NaN	NaN

Category	p-value
Cauliflowers	0.994755
Penrose Tilings	0.000003
Caustics	0.994755
Skylines	0.978368

Teaocha Trek

Comparing Wavelets for Image Compression

Part 1: A quick overview of the wavelet transform ¶

Part 2: Overview of the experiment ¶

Part 3: The Code ¶

Part 4: Results ¶

Cauliflowers ¶

Penrose Tilings ¶

Caustics ¶

Skylines ¶

Part 5: Analysis ¶

Conclusion ¶

Bibliography ¶

Image Attributions ¶

Links ¶