dev/modules/covariance.html


<!DOCTYPE html>


<html lang="en" data-content_root="../" >

  <head>
    <meta charset="utf-8" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" /><meta name="viewport" content="width=device-width, initial-scale=1" />
<meta property="og:title" content="2.6. Covariance estimation" />
<meta property="og:type" content="website" />
<meta property="og:url" content="https://scikit-learn/stable/modules/covariance.html" />
<meta property="og:site_name" content="scikit-learn" />
<meta property="og:description" content="Many statistical problems require the estimation of a population’s covariance matrix, which can be seen as an estimation of data set scatter plot shape. Most of the time, such an estimation has to ..." />
<meta property="og:image" content="https://scikit-learn/stable/_images/sphx_glr_plot_covariance_estimation_001.png" />
<meta property="og:image:alt" content="scikit-learn" />
<meta name="description" content="Many statistical problems require the estimation of a population’s covariance matrix, which can be seen as an estimation of data set scatter plot shape. Most of the time, such an estimation has to ..." />

    <title>2.6. Covariance estimation &#8212; scikit-learn 1.7.dev0 documentation</title>
  
  
  <script data-cfasync="false">
    document.documentElement.dataset.mode = localStorage.getItem("mode") || "";
    document.documentElement.dataset.theme = localStorage.getItem("theme") || "";
  </script>
  <!--
    this give us a css class that will be invisible only if js is disabled
  -->
  <noscript>
    <style>
      .pst-js-only { display: none !important; }

    </style>
  </noscript>
  
  <!-- Loaded before other Sphinx assets -->
  <link href="../_static/styles/theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />
<link href="../_static/styles/pydata-sphinx-theme.css?digest=8878045cc6db502f8baf" rel="stylesheet" />

    <link rel="stylesheet" type="text/css" href="../_static/pygments.css?v=8f2a1f02" />
    <link rel="stylesheet" type="text/css" href="../_static/copybutton.css?v=76b2166b" />
    <link rel="stylesheet" type="text/css" href="../_static/plot_directive.css" />
    <link rel="stylesheet" type="text/css" href="https://fonts.googleapis.com/css?family=Vibur" />
    <link rel="stylesheet" type="text/css" href="../_static/jupyterlite_sphinx.css?v=2c9f8f05" />
    <link rel="stylesheet" type="text/css" href="../_static/sg_gallery.css?v=d2d258e8" />
    <link rel="stylesheet" type="text/css" href="../_static/sg_gallery-binder.css?v=f4aeca0c" />
    <link rel="stylesheet" type="text/css" href="../_static/sg_gallery-dataframe.css?v=2082cf3c" />
    <link rel="stylesheet" type="text/css" href="../_static/sg_gallery-rendered-html.css?v=1277b6f3" />
    <link rel="stylesheet" type="text/css" href="../_static/sphinx-design.min.css?v=95c83b7e" />
    <link rel="stylesheet" type="text/css" href="../_static/styles/colors.css?v=cc94ab7d" />
    <link rel="stylesheet" type="text/css" href="../_static/styles/custom.css?v=8f525996" />
  
  <!-- So that users can add custom icons -->
  <script src="../_static/scripts/fontawesome.js?digest=8878045cc6db502f8baf"></script>
  <!-- Pre-loaded scripts that we'll load fully later -->
  <link rel="preload" as="script" href="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf" />
<link rel="preload" as="script" href="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf" />

    <script src="../_static/documentation_options.js?v=473747f4"></script>
    <script src="../_static/doctools.js?v=9bcbadda"></script>
    <script src="../_static/sphinx_highlight.js?v=dc90522c"></script>
    <script src="../_static/clipboard.min.js?v=a7894cd8"></script>
    <script src="../_static/copybutton.js?v=97f0b27d"></script>
    <script src="../_static/jupyterlite_sphinx.js?v=96e329c5"></script>
    <script src="../_static/design-tabs.js?v=f930bc37"></script>
    <script data-domain="scikit-learn.org" defer="defer" src="https://views.scientific-python.org/js/script.js"></script>
    <script async="async" src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js"></script>
    <script>DOCUMENTATION_OPTIONS.pagename = 'modules/covariance';</script>
    <script>
        DOCUMENTATION_OPTIONS.theme_version = '0.16.1';
        DOCUMENTATION_OPTIONS.theme_switcher_json_url = 'https://scikit-learn.org/dev/_static/versions.json';
        DOCUMENTATION_OPTIONS.theme_switcher_version_match = '1.7.dev0';
        DOCUMENTATION_OPTIONS.show_version_warning_banner =
            true;
        </script>
    <script src="../_static/scripts/dropdown.js?v=d6825577"></script>
    <script src="../_static/scripts/version-switcher.js?v=a6dd8357"></script>
    <script src="../_static/scripts/sg_plotly_resize.js?v=2167d4db"></script>
    <link rel="canonical" href="https://scikit-learn.org/stable/modules/covariance.html" />
    <link rel="icon" href="../_static/favicon.ico"/>
    <link rel="author" title="About these documents" href="../about.html" />
    <link rel="search" title="Search" href="../search.html" />
    <link rel="next" title="2.7. Novelty and Outlier Detection" href="outlier_detection.html" />
    <link rel="prev" title="2.5. Decomposing signals in components (matrix factorization problems)" href="decomposition.html" />
  <meta name="viewport" content="width=device-width, initial-scale=1"/>
  <meta name="docsearch:language" content="en"/>
  <meta name="docsearch:version" content="1.7" />
  </head>
  
  
  <body data-bs-spy="scroll" data-bs-target=".bd-toc-nav" data-offset="180" data-bs-root-margin="0px 0px -60%" data-default-mode="">

  
  <div id="pst-skip-link" class="skip-link d-print-none"><a href="#main-content">Skip to main content</a></div>
  
  <div id="pst-scroll-pixel-helper"></div>
  
  <button type="button" class="btn rounded-pill" id="pst-back-to-top">
    <i class="fa-solid fa-arrow-up"></i>Back to top</button>

  
  <dialog id="pst-search-dialog">
    
<form class="bd-search d-flex align-items-center"
      action="../search.html"
      method="get">
  <i class="fa-solid fa-magnifying-glass"></i>
  <input type="search"
         class="form-control"
         name="q"
         placeholder="Search the docs ..."
         aria-label="Search the docs ..."
         autocomplete="off"
         autocorrect="off"
         autocapitalize="off"
         spellcheck="false"/>
  <span class="search-button__kbd-shortcut"><kbd class="kbd-shortcut__modifier">Ctrl</kbd>+<kbd>K</kbd></span>
</form>
  </dialog>

  <div class="pst-async-banner-revealer d-none">
  <aside id="bd-header-version-warning" class="d-none d-print-none" aria-label="Version warning"></aside>
</div>

  
    <header class="bd-header navbar navbar-expand-lg bd-navbar d-print-none">
<div class="bd-header__inner bd-page-width">
  <button class="pst-navbar-icon sidebar-toggle primary-toggle" aria-label="Site navigation">
    <span class="fa-solid fa-bars"></span>
  </button>
  
  
  <div class=" navbar-header-items__start">
    
      <div class="navbar-item">

  
<a class="navbar-brand logo" href="../index.html">
  
  
    <img src="../_static/scikit-learn-logo-small.png" class="logo__image only-light" alt="scikit-learn homepage"/>
    <img src="../_static/scikit-learn-logo-small.png" class="logo__image only-dark pst-js-only" alt="scikit-learn homepage"/>
  
  
</a></div>
    
  </div>
  
  <div class=" navbar-header-items">
    
    <div class="me-auto navbar-header-items__center">
      
        <div class="navbar-item">
<nav>
  <ul class="bd-navbar-elements navbar-nav">
    
<li class="nav-item ">
  <a class="nav-link nav-internal" href="../install.html">
    Install
  </a>
</li>


<li class="nav-item current active">
  <a class="nav-link nav-internal" href="../user_guide.html">
    User Guide
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../api/index.html">
    API
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../auto_examples/index.html">
    Examples
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-external" href="https://blog.scikit-learn.org/">
    Community
  </a>
</li>

            <li class="nav-item dropdown">
                <button class="btn dropdown-toggle nav-item" type="button"
                data-bs-toggle="dropdown" aria-expanded="false"
                aria-controls="pst-nav-more-links">
                    More
                </button>
                <ul id="pst-nav-more-links" class="dropdown-menu">
                    
<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="../getting_started.html">
    Getting Started
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="../whats_new.html">
    Release History
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="../glossary.html">
    Glossary
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="../developers/index.html">
    Development
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="../faq.html">
    FAQ
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="../support.html">
    Support
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="../related_projects.html">
    Related Projects
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="../roadmap.html">
    Roadmap
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="../governance.html">
    Governance
  </a>
</li>


<li class=" ">
  <a class="nav-link dropdown-item nav-internal" href="../about.html">
    About us
  </a>
</li>

                </ul>
            </li>
            
  </ul>
</nav></div>
      
    </div>
    
    
    <div class="navbar-header-items__end">
      
        <div class="navbar-item navbar-persistent--container">
          

<button class="btn btn-sm pst-navbar-icon search-button search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
</button>
        </div>
      
      
        <div class="navbar-item">

<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
</button></div>
      
        <div class="navbar-item"><ul class="navbar-icon-links"
    aria-label="Icon Links">
        <li class="nav-item">
          
          
          <a href="https://github.com/scikit-learn/scikit-learn" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i>
            <span class="sr-only">GitHub</span></a>
        </li>
</ul></div>
      
        <div class="navbar-item">
<div class="version-switcher__container dropdown pst-js-only">
  <button id="pst-version-switcher-button-2"
    type="button"
    class="version-switcher__button btn btn-sm dropdown-toggle"
    data-bs-toggle="dropdown"
    aria-haspopup="listbox"
    aria-controls="pst-version-switcher-list-2"
    aria-label="Version switcher list"
  >
    Choose version  <!-- this text may get changed later by javascript -->
    <span class="caret"></span>
  </button>
  <div id="pst-version-switcher-list-2"
    class="version-switcher__menu dropdown-menu list-group-flush py-0"
    role="listbox" aria-labelledby="pst-version-switcher-button-2">
    <!-- dropdown will be populated by javascript on page load -->
  </div>
</div></div>
      
    </div>
    
  </div>
  
  
    <div class="navbar-persistent--mobile">

<button class="btn btn-sm pst-navbar-icon search-button search-button__button pst-js-only" title="Search" aria-label="Search" data-bs-placement="bottom" data-bs-toggle="tooltip">
    <i class="fa-solid fa-magnifying-glass fa-lg"></i>
</button>
    </div>
  

    <button class="pst-navbar-icon sidebar-toggle secondary-toggle" aria-label="On this page">
      <span class="fa-solid fa-outdent"></span>
    </button>
  
</div>

    </header>
  

  <div class="bd-container">
    <div class="bd-container__inner bd-page-width">
      
      
      <dialog id="pst-primary-sidebar-modal"></dialog>
      <div id="pst-primary-sidebar" class="bd-sidebar-primary bd-sidebar">
        

  <div class="sidebar-header-items sidebar-primary__section">
    
    
      <div class="sidebar-header-items__center">
        
          
            <div class="navbar-item">
<nav>
  <ul class="bd-navbar-elements navbar-nav">
    
<li class="nav-item ">
  <a class="nav-link nav-internal" href="../install.html">
    Install
  </a>
</li>


<li class="nav-item current active">
  <a class="nav-link nav-internal" href="../user_guide.html">
    User Guide
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../api/index.html">
    API
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../auto_examples/index.html">
    Examples
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-external" href="https://blog.scikit-learn.org/">
    Community
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../getting_started.html">
    Getting Started
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../whats_new.html">
    Release History
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../glossary.html">
    Glossary
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../developers/index.html">
    Development
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../faq.html">
    FAQ
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../support.html">
    Support
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../related_projects.html">
    Related Projects
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../roadmap.html">
    Roadmap
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../governance.html">
    Governance
  </a>
</li>


<li class="nav-item ">
  <a class="nav-link nav-internal" href="../about.html">
    About us
  </a>
</li>

  </ul>
</nav></div>
          
        
      </div>
    
    
      <div class="sidebar-header-items__end">
        
          <div class="navbar-item">

<button class="btn btn-sm nav-link pst-navbar-icon theme-switch-button pst-js-only" aria-label="Color mode" data-bs-title="Color mode"  data-bs-placement="bottom" data-bs-toggle="tooltip">
  <i class="theme-switch fa-solid fa-sun                fa-lg" data-mode="light" title="Light"></i>
  <i class="theme-switch fa-solid fa-moon               fa-lg" data-mode="dark"  title="Dark"></i>
  <i class="theme-switch fa-solid fa-circle-half-stroke fa-lg" data-mode="auto"  title="System Settings"></i>
</button></div>
        
          <div class="navbar-item"><ul class="navbar-icon-links"
    aria-label="Icon Links">
        <li class="nav-item">
          
          
          <a href="https://github.com/scikit-learn/scikit-learn" title="GitHub" class="nav-link pst-navbar-icon" rel="noopener" target="_blank" data-bs-toggle="tooltip" data-bs-placement="bottom"><i class="fa-brands fa-square-github fa-lg" aria-hidden="true"></i>
            <span class="sr-only">GitHub</span></a>
        </li>
</ul></div>
        
          <div class="navbar-item">
<div class="version-switcher__container dropdown pst-js-only">
  <button id="pst-version-switcher-button-3"
    type="button"
    class="version-switcher__button btn btn-sm dropdown-toggle"
    data-bs-toggle="dropdown"
    aria-haspopup="listbox"
    aria-controls="pst-version-switcher-list-3"
    aria-label="Version switcher list"
  >
    Choose version  <!-- this text may get changed later by javascript -->
    <span class="caret"></span>
  </button>
  <div id="pst-version-switcher-list-3"
    class="version-switcher__menu dropdown-menu list-group-flush py-0"
    role="listbox" aria-labelledby="pst-version-switcher-button-3">
    <!-- dropdown will be populated by javascript on page load -->
  </div>
</div></div>
        
      </div>
    
  </div>
  
    <div class="sidebar-primary-items__start sidebar-primary__section">
        <div class="sidebar-primary-item">
<nav class="bd-docs-nav bd-links"
     aria-label="Section Navigation">
  <p class="bd-links__title" role="heading" aria-level="1">Section Navigation</p>
  <div class="bd-toc-item navbar-nav"><ul class="current nav bd-sidenav">
<li class="toctree-l1 has-children"><a class="reference internal" href="../supervised_learning.html">1. Supervised learning</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="linear_model.html">1.1. Linear Models</a></li>
<li class="toctree-l2"><a class="reference internal" href="lda_qda.html">1.2. Linear and Quadratic Discriminant Analysis</a></li>
<li class="toctree-l2"><a class="reference internal" href="kernel_ridge.html">1.3. Kernel ridge regression</a></li>
<li class="toctree-l2"><a class="reference internal" href="svm.html">1.4. Support Vector Machines</a></li>
<li class="toctree-l2"><a class="reference internal" href="sgd.html">1.5. Stochastic Gradient Descent</a></li>
<li class="toctree-l2"><a class="reference internal" href="neighbors.html">1.6. Nearest Neighbors</a></li>
<li class="toctree-l2"><a class="reference internal" href="gaussian_process.html">1.7. Gaussian Processes</a></li>
<li class="toctree-l2"><a class="reference internal" href="cross_decomposition.html">1.8. Cross decomposition</a></li>
<li class="toctree-l2"><a class="reference internal" href="naive_bayes.html">1.9. Naive Bayes</a></li>
<li class="toctree-l2"><a class="reference internal" href="tree.html">1.10. Decision Trees</a></li>
<li class="toctree-l2"><a class="reference internal" href="ensemble.html">1.11. Ensembles: Gradient boosting, random forests, bagging, voting, stacking</a></li>
<li class="toctree-l2"><a class="reference internal" href="multiclass.html">1.12. Multiclass and multioutput algorithms</a></li>
<li class="toctree-l2"><a class="reference internal" href="feature_selection.html">1.13. Feature selection</a></li>
<li class="toctree-l2"><a class="reference internal" href="semi_supervised.html">1.14. Semi-supervised learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="isotonic.html">1.15. Isotonic regression</a></li>
<li class="toctree-l2"><a class="reference internal" href="calibration.html">1.16. Probability calibration</a></li>
<li class="toctree-l2"><a class="reference internal" href="neural_networks_supervised.html">1.17. Neural network models (supervised)</a></li>
</ul>
</details></li>
<li class="toctree-l1 current active has-children"><a class="reference internal" href="../unsupervised_learning.html">2. Unsupervised learning</a><details open="open"><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul class="current">
<li class="toctree-l2"><a class="reference internal" href="mixture.html">2.1. Gaussian mixture models</a></li>
<li class="toctree-l2"><a class="reference internal" href="manifold.html">2.2. Manifold learning</a></li>
<li class="toctree-l2"><a class="reference internal" href="clustering.html">2.3. Clustering</a></li>
<li class="toctree-l2"><a class="reference internal" href="biclustering.html">2.4. Biclustering</a></li>
<li class="toctree-l2"><a class="reference internal" href="decomposition.html">2.5. Decomposing signals in components (matrix factorization problems)</a></li>
<li class="toctree-l2 current active"><a class="current reference internal" href="#">2.6. Covariance estimation</a></li>
<li class="toctree-l2"><a class="reference internal" href="outlier_detection.html">2.7. Novelty and Outlier Detection</a></li>
<li class="toctree-l2"><a class="reference internal" href="density.html">2.8. Density Estimation</a></li>
<li class="toctree-l2"><a class="reference internal" href="neural_networks_unsupervised.html">2.9. Neural network models (unsupervised)</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../model_selection.html">3. Model selection and evaluation</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="cross_validation.html">3.1. Cross-validation: evaluating estimator performance</a></li>
<li class="toctree-l2"><a class="reference internal" href="grid_search.html">3.2. Tuning the hyper-parameters of an estimator</a></li>
<li class="toctree-l2"><a class="reference internal" href="classification_threshold.html">3.3. Tuning the decision threshold for class prediction</a></li>
<li class="toctree-l2"><a class="reference internal" href="model_evaluation.html">3.4. Metrics and scoring: quantifying the quality of predictions</a></li>
<li class="toctree-l2"><a class="reference internal" href="learning_curve.html">3.5. Validation curves: plotting scores to evaluate models</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="../metadata_routing.html">4. Metadata Routing</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../inspection.html">5. Inspection</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="partial_dependence.html">5.1. Partial Dependence and Individual Conditional Expectation plots</a></li>
<li class="toctree-l2"><a class="reference internal" href="permutation_importance.html">5.2. Permutation feature importance</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="../visualizations.html">6. Visualizations</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../data_transforms.html">7. Dataset transformations</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="compose.html">7.1. Pipelines and composite estimators</a></li>
<li class="toctree-l2"><a class="reference internal" href="feature_extraction.html">7.2. Feature extraction</a></li>
<li class="toctree-l2"><a class="reference internal" href="preprocessing.html">7.3. Preprocessing data</a></li>
<li class="toctree-l2"><a class="reference internal" href="impute.html">7.4. Imputation of missing values</a></li>
<li class="toctree-l2"><a class="reference internal" href="unsupervised_reduction.html">7.5. Unsupervised dimensionality reduction</a></li>
<li class="toctree-l2"><a class="reference internal" href="random_projection.html">7.6. Random Projection</a></li>
<li class="toctree-l2"><a class="reference internal" href="kernel_approximation.html">7.7. Kernel Approximation</a></li>
<li class="toctree-l2"><a class="reference internal" href="metrics.html">7.8. Pairwise metrics, Affinities and Kernels</a></li>
<li class="toctree-l2"><a class="reference internal" href="preprocessing_targets.html">7.9. Transforming the prediction target (<code class="docutils literal notranslate"><span class="pre">y</span></code>)</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../datasets.html">8. Dataset loading utilities</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../datasets/toy_dataset.html">8.1. Toy datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="../datasets/real_world.html">8.2. Real world datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="../datasets/sample_generators.html">8.3. Generated datasets</a></li>
<li class="toctree-l2"><a class="reference internal" href="../datasets/loading_other_datasets.html">8.4. Loading other datasets</a></li>
</ul>
</details></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../computing.html">9. Computing with scikit-learn</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="../computing/scaling_strategies.html">9.1. Strategies to scale computationally: bigger data</a></li>
<li class="toctree-l2"><a class="reference internal" href="../computing/computational_performance.html">9.2. Computational Performance</a></li>
<li class="toctree-l2"><a class="reference internal" href="../computing/parallelism.html">9.3. Parallelism, resource management, and configuration</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="../model_persistence.html">10. Model persistence</a></li>
<li class="toctree-l1"><a class="reference internal" href="../common_pitfalls.html">11. Common pitfalls and recommended practices</a></li>
<li class="toctree-l1 has-children"><a class="reference internal" href="../dispatching.html">12. Dispatching</a><details><summary><span class="toctree-toggle" role="presentation"><i class="fa-solid fa-chevron-down"></i></span></summary><ul>
<li class="toctree-l2"><a class="reference internal" href="array_api.html">12.1. Array API support (experimental)</a></li>
</ul>
</details></li>
<li class="toctree-l1"><a class="reference internal" href="../machine_learning_map.html">13. Choosing the right estimator</a></li>
<li class="toctree-l1"><a class="reference internal" href="../presentations.html">14. External Resources, Videos and Talks</a></li>
</ul>
</div>
</nav></div>
    </div>
  
  
  <div class="sidebar-primary-items__end sidebar-primary__section">
  </div>


      </div>
      
      <main id="main-content" class="bd-main" role="main">
        
        
          <div class="bd-content">
            <div class="bd-article-container">
              
              <div class="bd-header-article d-print-none">
<div class="header-article-items header-article__inner">
  
    <div class="header-article-items__start">
      
        <div class="header-article-item">

<nav aria-label="Breadcrumb" class="d-print-none">
  <ul class="bd-breadcrumbs">
    
    <li class="breadcrumb-item breadcrumb-home">
      <a href="../index.html" class="nav-link" aria-label="Home">
        <i class="fa-solid fa-home"></i>
      </a>
    </li>
    
    <li class="breadcrumb-item"><a href="../user_guide.html" class="nav-link">User Guide</a></li>
    
    
    <li class="breadcrumb-item"><a href="../unsupervised_learning.html" class="nav-link"><span class="section-number">2. </span>Unsupervised learning</a></li>
    
    <li class="breadcrumb-item active" aria-current="page"><span class="ellipsis"><span class="section-number">2.6. </span>Covariance estimation</span></li>
  </ul>
</nav>
</div>
      
    </div>
  
  
</div>
</div>
              
              
<div id="searchbox"></div>
                <article class="bd-article">
                  
  <section id="covariance-estimation">
<span id="covariance"></span><h1><span class="section-number">2.6. </span>Covariance estimation<a class="headerlink" href="#covariance-estimation" title="Link to this heading">#</a></h1>
<p>Many statistical problems require the estimation of a
population’s covariance matrix, which can be seen as an estimation of
data set scatter plot shape. Most of the time, such an estimation has
to be done on a sample whose properties (size, structure, homogeneity)
have a large influence on the estimation’s quality. The
<a class="reference internal" href="../api/sklearn.covariance.html#module-sklearn.covariance" title="sklearn.covariance"><code class="xref py py-mod docutils literal notranslate"><span class="pre">sklearn.covariance</span></code></a> package provides tools for accurately estimating
a population’s covariance matrix under various settings.</p>
<p>We assume that the observations are independent and identically
distributed (i.i.d.).</p>
<section id="empirical-covariance">
<h2><span class="section-number">2.6.1. </span>Empirical covariance<a class="headerlink" href="#empirical-covariance" title="Link to this heading">#</a></h2>
<p>The covariance matrix of a data set is known to be well approximated
by the classical <em>maximum likelihood estimator</em> (or “empirical
covariance”), provided the number of observations is large enough
compared to the number of features (the variables describing the
observations). More precisely, the Maximum Likelihood Estimator of a
sample is an asymptotically unbiased estimator of the corresponding
population’s covariance matrix.</p>
<p>The empirical covariance matrix of a sample can be computed using the
<a class="reference internal" href="generated/sklearn.covariance.empirical_covariance.html#sklearn.covariance.empirical_covariance" title="sklearn.covariance.empirical_covariance"><code class="xref py py-func docutils literal notranslate"><span class="pre">empirical_covariance</span></code></a> function of the package, or by fitting an
<a class="reference internal" href="generated/sklearn.covariance.EmpiricalCovariance.html#sklearn.covariance.EmpiricalCovariance" title="sklearn.covariance.EmpiricalCovariance"><code class="xref py py-class docutils literal notranslate"><span class="pre">EmpiricalCovariance</span></code></a> object to the data sample with the
<a class="reference internal" href="generated/sklearn.covariance.EmpiricalCovariance.html#sklearn.covariance.EmpiricalCovariance.fit" title="sklearn.covariance.EmpiricalCovariance.fit"><code class="xref py py-meth docutils literal notranslate"><span class="pre">EmpiricalCovariance.fit</span></code></a> method. Be careful that results depend
on whether the data are centered, so one may want to use the
<code class="docutils literal notranslate"><span class="pre">assume_centered</span></code> parameter accurately. More precisely, if
<code class="docutils literal notranslate"><span class="pre">assume_centered=False</span></code>, then the test set is supposed to have the
same mean vector as the training set. If not, both should be centered
by the user, and <code class="docutils literal notranslate"><span class="pre">assume_centered=True</span></code> should be used.</p>
<p class="rubric">Examples</p>
<ul class="simple">
<li><p>See <a class="reference internal" href="../auto_examples/covariance/plot_covariance_estimation.html#sphx-glr-auto-examples-covariance-plot-covariance-estimation-py"><span class="std std-ref">Shrinkage covariance estimation: LedoitWolf vs OAS and max-likelihood</span></a> for
an example on how to fit an <a class="reference internal" href="generated/sklearn.covariance.EmpiricalCovariance.html#sklearn.covariance.EmpiricalCovariance" title="sklearn.covariance.EmpiricalCovariance"><code class="xref py py-class docutils literal notranslate"><span class="pre">EmpiricalCovariance</span></code></a> object to data.</p></li>
</ul>
</section>
<section id="shrunk-covariance">
<span id="id1"></span><h2><span class="section-number">2.6.2. </span>Shrunk Covariance<a class="headerlink" href="#shrunk-covariance" title="Link to this heading">#</a></h2>
<section id="basic-shrinkage">
<h3><span class="section-number">2.6.2.1. </span>Basic shrinkage<a class="headerlink" href="#basic-shrinkage" title="Link to this heading">#</a></h3>
<p>Despite being an asymptotically unbiased estimator of the covariance matrix,
the Maximum Likelihood Estimator is not a good estimator of the
eigenvalues of the covariance matrix, so the precision matrix obtained
from its inversion is not accurate. Sometimes, it even occurs that the
empirical covariance matrix cannot be inverted for numerical
reasons. To avoid such an inversion problem, a transformation of the
empirical covariance matrix has been introduced: the <code class="docutils literal notranslate"><span class="pre">shrinkage</span></code>.</p>
<p>In scikit-learn, this transformation (with a user-defined shrinkage
coefficient) can be directly applied to a pre-computed covariance with
the <a class="reference internal" href="generated/sklearn.covariance.shrunk_covariance.html#sklearn.covariance.shrunk_covariance" title="sklearn.covariance.shrunk_covariance"><code class="xref py py-func docutils literal notranslate"><span class="pre">shrunk_covariance</span></code></a> method. Also, a shrunk estimator of the
covariance can be fitted to data with a <a class="reference internal" href="generated/sklearn.covariance.ShrunkCovariance.html#sklearn.covariance.ShrunkCovariance" title="sklearn.covariance.ShrunkCovariance"><code class="xref py py-class docutils literal notranslate"><span class="pre">ShrunkCovariance</span></code></a> object
and its <a class="reference internal" href="generated/sklearn.covariance.ShrunkCovariance.html#sklearn.covariance.ShrunkCovariance.fit" title="sklearn.covariance.ShrunkCovariance.fit"><code class="xref py py-meth docutils literal notranslate"><span class="pre">ShrunkCovariance.fit</span></code></a> method. Again, results depend on
whether the data are centered, so one may want to use the
<code class="docutils literal notranslate"><span class="pre">assume_centered</span></code> parameter accurately.</p>
<p>Mathematically, this shrinkage consists in reducing the ratio between the
smallest and the largest eigenvalues of the empirical covariance matrix.
It can be done by simply shifting every eigenvalue according to a given
offset, which is equivalent of finding the l2-penalized Maximum
Likelihood Estimator of the covariance matrix. In practice, shrinkage
boils down to a simple convex transformation : <span class="math notranslate nohighlight">\(\Sigma_{\rm
shrunk} = (1-\alpha)\hat{\Sigma} + \alpha\frac{{\rm
Tr}\hat{\Sigma}}{p}\rm Id\)</span>.</p>
<p>Choosing the amount of shrinkage, <span class="math notranslate nohighlight">\(\alpha\)</span> amounts to setting a
bias/variance trade-off, and is discussed below.</p>
<p class="rubric">Examples</p>
<ul class="simple">
<li><p>See <a class="reference internal" href="../auto_examples/covariance/plot_covariance_estimation.html#sphx-glr-auto-examples-covariance-plot-covariance-estimation-py"><span class="std std-ref">Shrinkage covariance estimation: LedoitWolf vs OAS and max-likelihood</span></a> for
an example on how to fit a <a class="reference internal" href="generated/sklearn.covariance.ShrunkCovariance.html#sklearn.covariance.ShrunkCovariance" title="sklearn.covariance.ShrunkCovariance"><code class="xref py py-class docutils literal notranslate"><span class="pre">ShrunkCovariance</span></code></a> object to data.</p></li>
</ul>
</section>
<section id="ledoit-wolf-shrinkage">
<h3><span class="section-number">2.6.2.2. </span>Ledoit-Wolf shrinkage<a class="headerlink" href="#ledoit-wolf-shrinkage" title="Link to this heading">#</a></h3>
<p>In their 2004 paper <a class="footnote-reference brackets" href="#id3" id="id2" role="doc-noteref"><span class="fn-bracket">[</span>1<span class="fn-bracket">]</span></a>, O. Ledoit and M. Wolf propose a formula
to compute the optimal shrinkage coefficient <span class="math notranslate nohighlight">\(\alpha\)</span> that
minimizes the Mean Squared Error between the estimated and the real
covariance matrix.</p>
<p>The Ledoit-Wolf estimator of the covariance matrix can be computed on
a sample with the <a class="reference internal" href="generated/sklearn.covariance.ledoit_wolf.html#sklearn.covariance.ledoit_wolf" title="sklearn.covariance.ledoit_wolf"><code class="xref py py-meth docutils literal notranslate"><span class="pre">ledoit_wolf</span></code></a> function of the
<a class="reference internal" href="../api/sklearn.covariance.html#module-sklearn.covariance" title="sklearn.covariance"><code class="xref py py-mod docutils literal notranslate"><span class="pre">sklearn.covariance</span></code></a> package, or it can be otherwise obtained by
fitting a <a class="reference internal" href="generated/sklearn.covariance.LedoitWolf.html#sklearn.covariance.LedoitWolf" title="sklearn.covariance.LedoitWolf"><code class="xref py py-class docutils literal notranslate"><span class="pre">LedoitWolf</span></code></a> object to the same sample.</p>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p><strong>Case when population covariance matrix is isotropic</strong></p>
<p>It is important to note that when the number of samples is much larger than
the number of features, one would expect that no shrinkage would be
necessary. The intuition behind this is that if the population covariance
is full rank, when the number of samples grows, the sample covariance will
also become positive definite. As a result, no shrinkage would be necessary
and the method should automatically do this.</p>
<p>This, however, is not the case in the Ledoit-Wolf procedure when the
population covariance happens to be a multiple of the identity matrix. In
this case, the Ledoit-Wolf shrinkage estimate approaches 1 as the number of
samples increases. This indicates that the optimal estimate of the
covariance matrix in the Ledoit-Wolf sense is a multiple of the identity.
Since the population covariance is already a multiple of the identity
matrix, the Ledoit-Wolf solution is indeed a reasonable estimate.</p>
</div>
<p class="rubric">Examples</p>
<ul class="simple">
<li><p>See <a class="reference internal" href="../auto_examples/covariance/plot_covariance_estimation.html#sphx-glr-auto-examples-covariance-plot-covariance-estimation-py"><span class="std std-ref">Shrinkage covariance estimation: LedoitWolf vs OAS and max-likelihood</span></a> for
an example on how to fit a <a class="reference internal" href="generated/sklearn.covariance.LedoitWolf.html#sklearn.covariance.LedoitWolf" title="sklearn.covariance.LedoitWolf"><code class="xref py py-class docutils literal notranslate"><span class="pre">LedoitWolf</span></code></a> object to data and
for visualizing the performances of the Ledoit-Wolf estimator in
terms of likelihood.</p></li>
</ul>
<p class="rubric">References</p>
<aside class="footnote-list brackets">
<aside class="footnote brackets" id="id3" role="doc-footnote">
<span class="label"><span class="fn-bracket">[</span><a role="doc-backlink" href="#id2">1</a><span class="fn-bracket">]</span></span>
<p>O. Ledoit and M. Wolf, “A Well-Conditioned Estimator for Large-Dimensional
Covariance Matrices”, Journal of Multivariate Analysis, Volume 88, Issue 2,
February 2004, pages 365-411.</p>
</aside>
</aside>
</section>
<section id="oracle-approximating-shrinkage">
<span id="id4"></span><h3><span class="section-number">2.6.2.3. </span>Oracle Approximating Shrinkage<a class="headerlink" href="#oracle-approximating-shrinkage" title="Link to this heading">#</a></h3>
<p>Under the assumption that the data are Gaussian distributed, Chen et
al. <a class="footnote-reference brackets" href="#id6" id="id5" role="doc-noteref"><span class="fn-bracket">[</span>2<span class="fn-bracket">]</span></a> derived a formula aimed at choosing a shrinkage coefficient that
yields a smaller Mean Squared Error than the one given by Ledoit and
Wolf’s formula. The resulting estimator is known as the Oracle
Shrinkage Approximating estimator of the covariance.</p>
<p>The OAS estimator of the covariance matrix can be computed on a sample
with the <a class="reference internal" href="generated/oas-function.html#sklearn.covariance.oas" title="sklearn.covariance.oas"><code class="xref py py-meth docutils literal notranslate"><span class="pre">oas</span></code></a> function of the <a class="reference internal" href="../api/sklearn.covariance.html#module-sklearn.covariance" title="sklearn.covariance"><code class="xref py py-mod docutils literal notranslate"><span class="pre">sklearn.covariance</span></code></a>
package, or it can be otherwise obtained by fitting an <a class="reference internal" href="generated/sklearn.covariance.OAS.html#sklearn.covariance.OAS" title="sklearn.covariance.OAS"><code class="xref py py-class docutils literal notranslate"><span class="pre">OAS</span></code></a>
object to the same sample.</p>
<figure class="align-center" id="id13">
<a class="reference external image-reference" href="../auto_examples/covariance/plot_covariance_estimation.html"><img alt="../_images/sphx_glr_plot_covariance_estimation_001.png" src="../_images/sphx_glr_plot_covariance_estimation_001.png" style="width: 416.0px; height: 312.0px;" />
</a>
<figcaption>
<p><span class="caption-text">Bias-variance trade-off when setting the shrinkage: comparing the
choices of Ledoit-Wolf and OAS estimators</span><a class="headerlink" href="#id13" title="Link to this image">#</a></p>
</figcaption>
</figure>
<p class="rubric">References</p>
<aside class="footnote-list brackets">
<aside class="footnote brackets" id="id6" role="doc-footnote">
<span class="label"><span class="fn-bracket">[</span><a role="doc-backlink" href="#id5">2</a><span class="fn-bracket">]</span></span>
<p><a class="reference external" href="https://arxiv.org/abs/0907.4698">“Shrinkage algorithms for MMSE covariance estimation.”,
Chen, Y., Wiesel, A., Eldar, Y. C., &amp; Hero, A. O.
IEEE Transactions on Signal Processing, 58(10), 5016-5029, 2010.</a></p>
</aside>
</aside>
<p class="rubric">Examples</p>
<ul class="simple">
<li><p>See <a class="reference internal" href="../auto_examples/covariance/plot_covariance_estimation.html#sphx-glr-auto-examples-covariance-plot-covariance-estimation-py"><span class="std std-ref">Shrinkage covariance estimation: LedoitWolf vs OAS and max-likelihood</span></a> for
an example on how to fit an <a class="reference internal" href="generated/sklearn.covariance.OAS.html#sklearn.covariance.OAS" title="sklearn.covariance.OAS"><code class="xref py py-class docutils literal notranslate"><span class="pre">OAS</span></code></a> object to data.</p></li>
<li><p>See <a class="reference internal" href="../auto_examples/covariance/plot_lw_vs_oas.html#sphx-glr-auto-examples-covariance-plot-lw-vs-oas-py"><span class="std std-ref">Ledoit-Wolf vs OAS estimation</span></a> to visualize the
Mean Squared Error difference between a <a class="reference internal" href="generated/sklearn.covariance.LedoitWolf.html#sklearn.covariance.LedoitWolf" title="sklearn.covariance.LedoitWolf"><code class="xref py py-class docutils literal notranslate"><span class="pre">LedoitWolf</span></code></a> and
an <a class="reference internal" href="generated/sklearn.covariance.OAS.html#sklearn.covariance.OAS" title="sklearn.covariance.OAS"><code class="xref py py-class docutils literal notranslate"><span class="pre">OAS</span></code></a> estimator of the covariance.</p></li>
</ul>
<figure class="align-center">
<a class="reference external image-reference" href="../auto_examples/covariance/plot_lw_vs_oas.html"><img alt="../_images/sphx_glr_plot_lw_vs_oas_001.png" src="../_images/sphx_glr_plot_lw_vs_oas_001.png" style="width: 480.0px; height: 360.0px;" />
</a>
</figure>
</section>
</section>
<section id="sparse-inverse-covariance">
<span id="id7"></span><h2><span class="section-number">2.6.3. </span>Sparse inverse covariance<a class="headerlink" href="#sparse-inverse-covariance" title="Link to this heading">#</a></h2>
<p>The matrix inverse of the covariance matrix, often called the precision
matrix, is proportional to the partial correlation matrix. It gives the
partial independence relationship. In other words, if two features are
independent conditionally on the others, the corresponding coefficient in
the precision matrix will be zero. This is why it makes sense to
estimate a sparse precision matrix: the estimation of the covariance
matrix is better conditioned by learning independence relations from
the data. This is known as <em>covariance selection</em>.</p>
<p>In the small-samples situation, in which <code class="docutils literal notranslate"><span class="pre">n_samples</span></code> is on the order
of <code class="docutils literal notranslate"><span class="pre">n_features</span></code> or smaller, sparse inverse covariance estimators tend to work
better than shrunk covariance estimators. However, in the opposite
situation, or for very correlated data, they can be numerically unstable.
In addition, unlike shrinkage estimators, sparse estimators are able to
recover off-diagonal structure.</p>
<p>The <a class="reference internal" href="generated/sklearn.covariance.GraphicalLasso.html#sklearn.covariance.GraphicalLasso" title="sklearn.covariance.GraphicalLasso"><code class="xref py py-class docutils literal notranslate"><span class="pre">GraphicalLasso</span></code></a> estimator uses an l1 penalty to enforce sparsity on
the precision matrix: the higher its <code class="docutils literal notranslate"><span class="pre">alpha</span></code> parameter, the more sparse
the precision matrix. The corresponding <a class="reference internal" href="generated/sklearn.covariance.GraphicalLassoCV.html#sklearn.covariance.GraphicalLassoCV" title="sklearn.covariance.GraphicalLassoCV"><code class="xref py py-class docutils literal notranslate"><span class="pre">GraphicalLassoCV</span></code></a> object uses
cross-validation to automatically set the <code class="docutils literal notranslate"><span class="pre">alpha</span></code> parameter.</p>
<figure class="align-center" id="id14">
<a class="reference external image-reference" href="../auto_examples/covariance/plot_sparse_cov.html"><img alt="../_images/sphx_glr_plot_sparse_cov_001.png" src="../_images/sphx_glr_plot_sparse_cov_001.png" style="width: 600.0px; height: 360.0px;" />
</a>
<figcaption>
<p><span class="caption-text"><em>A comparison of maximum likelihood, shrinkage and sparse estimates of
the covariance and precision matrix in the very small samples
settings.</em></span><a class="headerlink" href="#id14" title="Link to this image">#</a></p>
</figcaption>
</figure>
<div class="admonition note">
<p class="admonition-title">Note</p>
<p><strong>Structure recovery</strong></p>
<p>Recovering a graphical structure from correlations in the data is a
challenging thing. If you are interested in such recovery keep in mind
that:</p>
<ul class="simple">
<li><p>Recovery is easier from a correlation matrix than a covariance
matrix: standardize your observations before running <a class="reference internal" href="generated/sklearn.covariance.GraphicalLasso.html#sklearn.covariance.GraphicalLasso" title="sklearn.covariance.GraphicalLasso"><code class="xref py py-class docutils literal notranslate"><span class="pre">GraphicalLasso</span></code></a></p></li>
<li><p>If the underlying graph has nodes with much more connections than
the average node, the algorithm will miss some of these connections.</p></li>
<li><p>If your number of observations is not large compared to the number
of edges in your underlying graph, you will not recover it.</p></li>
<li><p>Even if you are in favorable recovery conditions, the alpha
parameter chosen by cross-validation (e.g. using the
<a class="reference internal" href="generated/sklearn.covariance.GraphicalLassoCV.html#sklearn.covariance.GraphicalLassoCV" title="sklearn.covariance.GraphicalLassoCV"><code class="xref py py-class docutils literal notranslate"><span class="pre">GraphicalLassoCV</span></code></a> object) will lead to selecting too many edges.
However, the relevant edges will have heavier weights than the
irrelevant ones.</p></li>
</ul>
</div>
<p>The mathematical formulation is the following:</p>
<div class="math notranslate nohighlight">
\[\hat{K} = \mathrm{argmin}_K \big(
            \mathrm{tr} S K - \mathrm{log} \mathrm{det} K
            + \alpha \|K\|_1
            \big)\]</div>
<p>Where <span class="math notranslate nohighlight">\(K\)</span> is the precision matrix to be estimated, and <span class="math notranslate nohighlight">\(S\)</span> is the
sample covariance matrix. <span class="math notranslate nohighlight">\(\|K\|_1\)</span> is the sum of the absolute values of
off-diagonal coefficients of <span class="math notranslate nohighlight">\(K\)</span>. The algorithm employed to solve this
problem is the GLasso algorithm, from the Friedman 2008 Biostatistics
paper. It is the same algorithm as in the R <code class="docutils literal notranslate"><span class="pre">glasso</span></code> package.</p>
<p class="rubric">Examples</p>
<ul class="simple">
<li><p><a class="reference internal" href="../auto_examples/covariance/plot_sparse_cov.html#sphx-glr-auto-examples-covariance-plot-sparse-cov-py"><span class="std std-ref">Sparse inverse covariance estimation</span></a>: example on synthetic
data showing some recovery of a structure, and comparing to other
covariance estimators.</p></li>
<li><p><a class="reference internal" href="../auto_examples/applications/plot_stock_market.html#sphx-glr-auto-examples-applications-plot-stock-market-py"><span class="std std-ref">Visualizing the stock market structure</span></a>: example on real
stock market data, finding which symbols are most linked.</p></li>
</ul>
<p class="rubric">References</p>
<ul class="simple">
<li><p>Friedman et al, <a class="reference external" href="https://biostatistics.oxfordjournals.org/content/9/3/432.short">“Sparse inverse covariance estimation with the
graphical lasso”</a>,
Biostatistics 9, pp 432, 2008</p></li>
</ul>
</section>
<section id="robust-covariance-estimation">
<span id="robust-covariance"></span><h2><span class="section-number">2.6.4. </span>Robust Covariance Estimation<a class="headerlink" href="#robust-covariance-estimation" title="Link to this heading">#</a></h2>
<p>Real data sets are often subject to measurement or recording
errors. Regular but uncommon observations may also appear for a variety
of reasons. Observations which are very uncommon are called
outliers.
The empirical covariance estimator and the shrunk covariance
estimators presented above are very sensitive to the presence of
outliers in the data. Therefore, one should use robust
covariance estimators to estimate the covariance of its real data
sets. Alternatively, robust covariance estimators can be used to
perform outlier detection and discard/downweight some observations
according to further processing of the data.</p>
<p>The <code class="docutils literal notranslate"><span class="pre">sklearn.covariance</span></code> package implements a robust estimator of covariance,
the Minimum Covariance Determinant <a class="footnote-reference brackets" href="#id11" id="id8" role="doc-noteref"><span class="fn-bracket">[</span>3<span class="fn-bracket">]</span></a>.</p>
<section id="minimum-covariance-determinant">
<h3><span class="section-number">2.6.4.1. </span>Minimum Covariance Determinant<a class="headerlink" href="#minimum-covariance-determinant" title="Link to this heading">#</a></h3>
<p>The Minimum Covariance Determinant estimator is a robust estimator of
a data set’s covariance introduced by P.J. Rousseeuw in <a class="footnote-reference brackets" href="#id11" id="id9" role="doc-noteref"><span class="fn-bracket">[</span>3<span class="fn-bracket">]</span></a>.  The idea
is to find a given proportion (h) of “good” observations which are not
outliers and compute their empirical covariance matrix.  This
empirical covariance matrix is then rescaled to compensate the
performed selection of observations (“consistency step”).  Having
computed the Minimum Covariance Determinant estimator, one can give
weights to observations according to their Mahalanobis distance,
leading to a reweighted estimate of the covariance matrix of the data
set (“reweighting step”).</p>
<p>Rousseeuw and Van Driessen <a class="footnote-reference brackets" href="#id12" id="id10" role="doc-noteref"><span class="fn-bracket">[</span>4<span class="fn-bracket">]</span></a> developed the FastMCD algorithm in order
to compute the Minimum Covariance Determinant. This algorithm is used
in scikit-learn when fitting an MCD object to data. The FastMCD
algorithm also computes a robust estimate of the data set location at
the same time.</p>
<p>Raw estimates can be accessed as <code class="docutils literal notranslate"><span class="pre">raw_location_</span></code> and <code class="docutils literal notranslate"><span class="pre">raw_covariance_</span></code>
attributes of a <a class="reference internal" href="generated/sklearn.covariance.MinCovDet.html#sklearn.covariance.MinCovDet" title="sklearn.covariance.MinCovDet"><code class="xref py py-class docutils literal notranslate"><span class="pre">MinCovDet</span></code></a> robust covariance estimator object.</p>
<p class="rubric">References</p>
<aside class="footnote-list brackets">
<aside class="footnote brackets" id="id11" role="doc-footnote">
<span class="label"><span class="fn-bracket">[</span>3<span class="fn-bracket">]</span></span>
<span class="backrefs">(<a role="doc-backlink" href="#id8">1</a>,<a role="doc-backlink" href="#id9">2</a>)</span>
<p>P. J. Rousseeuw. Least median of squares regression.
J. Am Stat Ass, 79:871, 1984.</p>
</aside>
<aside class="footnote brackets" id="id12" role="doc-footnote">
<span class="label"><span class="fn-bracket">[</span><a role="doc-backlink" href="#id10">4</a><span class="fn-bracket">]</span></span>
<p>A Fast Algorithm for the Minimum Covariance Determinant Estimator,
1999, American Statistical Association and the American Society
for Quality, TECHNOMETRICS.</p>
</aside>
</aside>
<p class="rubric">Examples</p>
<ul class="simple">
<li><p>See <a class="reference internal" href="../auto_examples/covariance/plot_robust_vs_empirical_covariance.html#sphx-glr-auto-examples-covariance-plot-robust-vs-empirical-covariance-py"><span class="std std-ref">Robust vs Empirical covariance estimate</span></a> for
an example on how to fit a <a class="reference internal" href="generated/sklearn.covariance.MinCovDet.html#sklearn.covariance.MinCovDet" title="sklearn.covariance.MinCovDet"><code class="xref py py-class docutils literal notranslate"><span class="pre">MinCovDet</span></code></a> object to data and see how
the estimate remains accurate despite the presence of outliers.</p></li>
<li><p>See <a class="reference internal" href="../auto_examples/covariance/plot_mahalanobis_distances.html#sphx-glr-auto-examples-covariance-plot-mahalanobis-distances-py"><span class="std std-ref">Robust covariance estimation and Mahalanobis distances relevance</span></a> to
visualize the difference between <a class="reference internal" href="generated/sklearn.covariance.EmpiricalCovariance.html#sklearn.covariance.EmpiricalCovariance" title="sklearn.covariance.EmpiricalCovariance"><code class="xref py py-class docutils literal notranslate"><span class="pre">EmpiricalCovariance</span></code></a> and
<a class="reference internal" href="generated/sklearn.covariance.MinCovDet.html#sklearn.covariance.MinCovDet" title="sklearn.covariance.MinCovDet"><code class="xref py py-class docutils literal notranslate"><span class="pre">MinCovDet</span></code></a> covariance estimators in terms of Mahalanobis distance
(so we get a better estimate of the precision matrix too).</p></li>
</ul>
<hr class="docutils" />
<div class="pst-scrollable-table-container"><table class="table">
<thead>
<tr class="row-odd"><th class="head"><p>Influence of outliers on location and covariance estimates</p></th>
<th class="head"><p>Separating inliers from outliers using a Mahalanobis distance</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p><a class="reference external" href="../auto_examples/covariance/plot_robust_vs_empirical_covariance.html"><img alt="robust_vs_emp" src="../_images/sphx_glr_plot_robust_vs_empirical_covariance_001.png" style="width: 313.6px; height: 235.2px;" /></a></p></td>
<td><p><a class="reference external" href="../auto_examples/covariance/plot_mahalanobis_distances.html"><img alt="mahalanobis" src="../_images/sphx_glr_plot_mahalanobis_distances_001.png" style="width: 490.0px; height: 245.0px;" /></a></p></td>
</tr>
</tbody>
</table>
</div>
</section>
</section>
</section>


                </article>
              
              
                <footer class="bd-footer-article">
                  <div class="footer-article-items footer-article__inner">
  
    <div class="footer-article-item">
<div class="prev-next-area">
    <a class="left-prev"
       href="decomposition.html"
       title="previous page">
      <i class="fa-solid fa-angle-left"></i>
      <div class="prev-next-info">
        <p class="prev-next-subtitle">previous</p>
        <p class="prev-next-title"><span class="section-number">2.5. </span>Decomposing signals in components (matrix factorization problems)</p>
      </div>
    </a>
    <a class="right-next"
       href="outlier_detection.html"
       title="next page">
      <div class="prev-next-info">
        <p class="prev-next-subtitle">next</p>
        <p class="prev-next-title"><span class="section-number">2.7. </span>Novelty and Outlier Detection</p>
      </div>
      <i class="fa-solid fa-angle-right"></i>
    </a>
</div></div>
  
</div>
                </footer>
              
              
            </div>
            
            
                <dialog id="pst-secondary-sidebar-modal"></dialog>
                <div id="pst-secondary-sidebar" class="bd-sidebar-secondary bd-toc"><div class="sidebar-secondary-items sidebar-secondary__inner">


  <div class="sidebar-secondary-item">
<div
    id="pst-page-navigation-heading-2"
    class="page-toc tocsection onthispage">
    <i class="fa-solid fa-list"></i> On this page
  </div>
  <nav class="bd-toc-nav page-toc" aria-labelledby="pst-page-navigation-heading-2">
    <ul class="visible nav section-nav flex-column">
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#empirical-covariance">2.6.1. Empirical covariance</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#shrunk-covariance">2.6.2. Shrunk Covariance</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#basic-shrinkage">2.6.2.1. Basic shrinkage</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#ledoit-wolf-shrinkage">2.6.2.2. Ledoit-Wolf shrinkage</a></li>
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#oracle-approximating-shrinkage">2.6.2.3. Oracle Approximating Shrinkage</a></li>
</ul>
</li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#sparse-inverse-covariance">2.6.3. Sparse inverse covariance</a></li>
<li class="toc-h2 nav-item toc-entry"><a class="reference internal nav-link" href="#robust-covariance-estimation">2.6.4. Robust Covariance Estimation</a><ul class="nav section-nav flex-column">
<li class="toc-h3 nav-item toc-entry"><a class="reference internal nav-link" href="#minimum-covariance-determinant">2.6.4.1. Minimum Covariance Determinant</a></li>
</ul>
</li>
</ul>
  </nav></div>

  <div class="sidebar-secondary-item">
  <div role="note" aria-label="source link">
    <h3>This Page</h3>
    <ul class="this-page-menu">
      <li><a href="../_sources/modules/covariance.rst.txt"
            rel="nofollow">Show Source</a></li>
    </ul>
   </div></div>

</div></div>
              
            
          </div>
          <footer class="bd-footer-content">
            
          </footer>
        
      </main>
    </div>
  </div>
  
  <!-- Scripts loaded after <body> so the DOM is not blocked -->
  <script defer src="../_static/scripts/bootstrap.js?digest=8878045cc6db502f8baf"></script>
<script defer src="../_static/scripts/pydata-sphinx-theme.js?digest=8878045cc6db502f8baf"></script>

  <footer class="bd-footer">
<div class="bd-footer__inner bd-page-width">
  
    <div class="footer-items__start">
      
        <div class="footer-item">

  <p class="copyright">
    
      © Copyright 2007 - 2025, scikit-learn developers (BSD License).
      <br/>
    
  </p>
</div>
      
    </div>
  
  
</div>

  </footer>
  </body>
</html>