diff --git a/ci/code_checks.sh b/ci/code_checks.sh
index f839d86318e2e..b03c4f2238445 100755
--- a/ci/code_checks.sh
+++ b/ci/code_checks.sh
@@ -188,9 +188,9 @@ if [[ -z "$CHECK" || "$CHECK" == "patterns" ]]; then
set -o pipefail
if [[ "$AZURE" == "true" ]]; then
# we exclude all c/cpp files as the c/cpp files of pandas code base are tested when Linting .c and .h files
- ! grep -n '--exclude=*.'{svg,c,cpp,html} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
+ ! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print "##vso[task.logissue type=error;sourcepath=" $1 ";linenumber=" $2 ";] Tailing whitespaces found: " $3}'
else
- ! grep -n '--exclude=*.'{svg,c,cpp,html} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
+ ! grep -n '--exclude=*.'{svg,c,cpp,html,js} --exclude-dir=env -RI "\s$" * | awk -F ":" '{print $1 ":" $2 ":Tailing whitespaces found: " $3}'
fi
RET=$(($RET + $?)) ; echo $MSG "DONE"
fi
diff --git a/environment.yml b/environment.yml
index 89089dcea8c99..6187321cd9242 100644
--- a/environment.yml
+++ b/environment.yml
@@ -36,6 +36,12 @@ dependencies:
- nbsphinx
- pandoc
+ # web (jinja2 is also needed, but it's also an optional pandas dependency)
+ - markdown
+ - feedparser
+ - pyyaml
+ - requests
+
# testing
- boto3
- botocore>=1.11
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 3f4636043dbac..fd8e6378240b4 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -17,6 +17,10 @@ numpydoc>=0.9.0
nbconvert>=5.4.1
nbsphinx
pandoc
+markdown
+feedparser
+pyyaml
+requests
boto3
botocore>=1.11
hypothesis>=3.82
diff --git a/web/README.md b/web/README.md
new file mode 100644
index 0000000000000..7396fbd0833a1
--- /dev/null
+++ b/web/README.md
@@ -0,0 +1,12 @@
+Directory containing the pandas website (hosted at https://pandas.io).
+
+The website sources are in `web/pandas/`, which also include a `config.yml` file
+containing the settings to build the website. The website is generated with the
+command `./pandas_web.py pandas`. See `./pandas_web.py --help` and the header of
+the script for more information and options.
+
+After building the website, to navigate it, it is needed to access the web using
+an http server (a not open the local files with the browser, since the links and
+the image sources are absolute to where they are served from). The easiest way
+to run an http server locally is to run `python -m http.server` from the
+`web/build/` directory.
diff --git a/web/pandas/_templates/layout.html b/web/pandas/_templates/layout.html
new file mode 100644
index 0000000000000..253318182f30c
--- /dev/null
+++ b/web/pandas/_templates/layout.html
@@ -0,0 +1,85 @@
+
+
+
+
+ pandas - Python Data Analysis Library
+
+
+
+ {% for stylesheet in static.css %}
+
+ {% endfor %}
+
+
+
+
+
+
+
+ {% block body %}{% endblock %}
+
+
+
+
+
+
+
+
+
diff --git a/web/pandas/blog.html b/web/pandas/blog.html
new file mode 100644
index 0000000000000..ffe6f97d679e4
--- /dev/null
+++ b/web/pandas/blog.html
@@ -0,0 +1,14 @@
+{% extends "layout.html" %}
+
+{% block body %}
+ {% for post in blog.posts %}
+
+ {% endfor %}
+{% endblock %}
diff --git a/web/pandas/community/about.md b/web/pandas/community/about.md
new file mode 100644
index 0000000000000..4e50d280d2a10
--- /dev/null
+++ b/web/pandas/community/about.md
@@ -0,0 +1,86 @@
+# About pandas
+
+## History of development
+
+In 2008, _pandas_ development began at [AQR Capital Management](http://www.aqr.com).
+By the end of 2009 it had been [open sourced](http://en.wikipedia.org/wiki/Open_source),
+and is actively supported today by a community of like-minded individuals around the world who
+contribute their valuable time and energy to help make open source _pandas_
+possible. Thank you to [all of our contributors](team.html).
+
+Since 2015, _pandas_ is a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects).
+This will help ensure the success of development of _pandas_ as a world-class open-source project.
+
+### Timeline
+
+- **2008**: Development of _pandas_ started
+- **2009**: _pandas_ becomes open source
+- **2012**: First edition of _Python for Data Analysis_ is published
+- **2015**: _pandas_ becomes a [NumFOCUS sponsored project](https://numfocus.org/sponsored-projects)
+- **2018**: First in-person core developer sprint
+
+## Library Highlights
+
+- A fast and efficient **DataFrame** object for data manipulation with
+ integrated indexing;
+
+- Tools for **reading and writing data** between in-memory data structures and
+ different formats: CSV and text files, Microsoft Excel, SQL databases, and
+ the fast HDF5 format;
+
+- Intelligent **data alignment** and integrated handling of **missing data**:
+ gain automatic label-based alignment in computations and easily manipulate
+ messy data into an orderly form;
+
+- Flexible **reshaping** and pivoting of data sets;
+
+- Intelligent label-based **slicing**, **fancy indexing**, and **subsetting**
+ of large data sets;
+
+- Columns can be inserted and deleted from data structures for **size
+ mutability**;
+
+- Aggregating or transforming data with a powerful **group by** engine
+ allowing split-apply-combine operations on data sets;
+
+- High performance **merging and joining** of data sets;
+
+- **Hierarchical axis indexing** provides an intuitive way of working with
+ high-dimensional data in a lower-dimensional data structure;
+
+- **Time series**-functionality: date range generation and frequency
+ conversion, moving window statistics, moving window linear regressions, date
+ shifting and lagging. Even create domain-specific time offsets and join time
+ series without losing data;
+
+- Highly **optimized for performance**, with critical code paths written in
+ [Cython](http://www.cython.org/) or C.
+
+- Python with *pandas* is in use in a wide variety of **academic and
+ commercial** domains, including Finance, Neuroscience, Economics,
+ Statistics, Advertising, Web Analytics, and more.
+
+## Mission
+
+_pandas_ aims to be the fundamental high-level building block for doing practical,
+real world data analysis in Python.
+Additionally, it has the broader goal of becoming the most powerful and flexible
+open source data analysis / manipulation tool available in any language.
+
+## Vision
+
+A world where data analytics and manipulation software is:
+
+- Accessible to everyone
+- Free for users to use and modify
+- Flexible
+- Powerful
+- Easy to use
+- Fast
+
+## Values
+
+Is in the core of _pandas_ to be respectful and welcoming with everybody,
+users, contributors and the broader community. Regardless of level of experience,
+gender, gender identity and expression, sexual orientation, disability,
+personal appearance, body size, race, ethnicity, age, religion, or nationality.
diff --git a/web/pandas/community/citing.md b/web/pandas/community/citing.md
new file mode 100644
index 0000000000000..6bad948bb3736
--- /dev/null
+++ b/web/pandas/community/citing.md
@@ -0,0 +1,46 @@
+# Citing pandas
+
+## Citing
+
+If you use _pandas_ for a scientific publication, we would appreciate citations to one of the following papers:
+
+- [Data structures for statistical computing in python](http://conference.scipy.org/proceedings/scipy2010/pdfs/mckinney.pdf),
+ McKinney, Proceedings of the 9th Python in Science Conference, Volume 445, 2010.
+
+ @inproceedings{mckinney2010data,
+ title={Data structures for statistical computing in python},
+ author={Wes McKinney},
+ booktitle={Proceedings of the 9th Python in Science Conference},
+ volume={445},
+ pages={51--56},
+ year={2010},
+ organization={Austin, TX}
+ }
+
+
+- [pandas: a foundational Python library for data analysis and statistics](https://www.scribd.com/document/71048089/pandas-a-Foundational-Python-Library-for-Data-Analysis-and-Statistics),
+ McKinney, Python for High Performance and Scientific Computing, Volume 14, 2011.
+
+ @article{mckinney2011pandas,
+ title={pandas: a foundational Python library for data analysis and statistics},
+ author={Wes McKinney},
+ journal={Python for High Performance and Scientific Computing},
+ volume={14},
+ year={2011}
+ }
+
+## Brand and logo
+
+When using the project name _pandas_, please use it in lower case, even at the beginning of a sentence.
+
+The official logo of _pandas_ is:
+
+
+
+You can download a `svg` version of the logo [here]({{ base_url }}/static/img/pandas.svg).
+
+When using the logo, please follow the next directives:
+
+- Leave enough margin around the logo
+- Do not distort the logo by changing its proportions
+- Do not place text or other elements on top of the logo
diff --git a/web/pandas/community/coc.md b/web/pandas/community/coc.md
new file mode 100644
index 0000000000000..2841349fdb556
--- /dev/null
+++ b/web/pandas/community/coc.md
@@ -0,0 +1,63 @@
+# Contributor Code of Conduct
+
+As contributors and maintainers of this project, and in the interest of
+fostering an open and welcoming community, we pledge to respect all people who
+contribute through reporting issues, posting feature requests, updating
+documentation, submitting pull requests or patches, and other activities.
+
+We are committed to making participation in this project a harassment-free
+experience for everyone, regardless of level of experience, gender, gender
+identity and expression, sexual orientation, disability, personal appearance,
+body size, race, ethnicity, age, religion, or nationality.
+
+Examples of unacceptable behavior by participants include:
+
+* The use of sexualized language or imagery
+* Personal attacks
+* Trolling or insulting/derogatory comments
+* Public or private harassment
+* Publishing other's private information, such as physical or electronic
+ addresses, without explicit permission
+* Other unethical or unprofessional conduct
+
+Project maintainers have the right and responsibility to remove, edit, or
+reject comments, commits, code, wiki edits, issues, and other contributions
+that are not aligned to this Code of Conduct, or to ban temporarily or
+permanently any contributor for other behaviors that they deem inappropriate,
+threatening, offensive, or harmful.
+
+By adopting this Code of Conduct, project maintainers commit themselves to
+fairly and consistently applying these principles to every aspect of managing
+this project. Project maintainers who do not follow or enforce the Code of
+Conduct may be permanently removed from the project team.
+
+This Code of Conduct applies both within project spaces and in public spaces
+when an individual is representing the project or its community.
+
+A working group of community members is committed to promptly addressing any
+reported issues. The working group is made up of pandas contributors and users.
+Instances of abusive, harassing, or otherwise unacceptable behavior may be
+reported by contacting the working group by e-mail (pandas-coc@googlegroups.com).
+Messages sent to this e-mail address will not be publicly visible but only to
+the working group members. The working group currently includes
+
+
+ {% for person in maintainers.coc %}
+
{{ person }}
+ {% endfor %}
+
+
+All complaints will be reviewed and investigated and will result in a response
+that is deemed necessary and appropriate to the circumstances. Maintainers are
+obligated to maintain confidentiality with regard to the reporter of an
+incident.
+
+This Code of Conduct is adapted from the [Contributor Covenant][homepage],
+version 1.3.0, available at
+[http://contributor-covenant.org/version/1/3/0/][version],
+and the [Swift Code of Conduct][swift].
+
+[homepage]: http://contributor-covenant.org
+[version]: http://contributor-covenant.org/version/1/3/0/
+[swift]: https://swift.org/community/#code-of-conduct
+
diff --git a/web/pandas/community/ecosystem.md b/web/pandas/community/ecosystem.md
new file mode 100644
index 0000000000000..af27c31b52d50
--- /dev/null
+++ b/web/pandas/community/ecosystem.md
@@ -0,0 +1,370 @@
+# Pandas ecosystem
+
+Increasingly, packages are being built on top of pandas to address
+specific needs in data preparation, analysis and visualization. This is
+encouraging because it means pandas is not only helping users to handle
+their data tasks but also that it provides a better starting point for
+developers to build powerful and more focused data tools. The creation
+of libraries that complement pandas' functionality also allows pandas
+development to remain focused around it's original requirements.
+
+This is an inexhaustive list of projects that build on pandas in order
+to provide tools in the PyData space. For a list of projects that depend
+on pandas, see the [libraries.io usage page for
+pandas](https://libraries.io/pypi/pandas/usage) or [search pypi for
+pandas](https://pypi.org/search/?q=pandas).
+
+We'd like to make it easier for users to find these projects, if you
+know of other substantial projects that you feel should be on this list,
+please let us know.
+
+## Statistics and machine learning
+
+### [Statsmodels](https://www.statsmodels.org/)
+
+Statsmodels is the prominent Python "statistics and econometrics
+library" and it has a long-standing special relationship with pandas.
+Statsmodels provides powerful statistics, econometrics, analysis and
+modeling functionality that is out of pandas' scope. Statsmodels
+leverages pandas objects as the underlying data container for
+computation.
+
+### [sklearn-pandas](https://github.com/paulgb/sklearn-pandas)
+
+Use pandas DataFrames in your [scikit-learn](https://scikit-learn.org/)
+ML pipeline.
+
+### [Featuretools](https://github.com/featuretools/featuretools/)
+
+Featuretools is a Python library for automated feature engineering built
+on top of pandas. It excels at transforming temporal and relational
+datasets into feature matrices for machine learning using reusable
+feature engineering "primitives". Users can contribute their own
+primitives in Python and share them with the rest of the community.
+
+## Visualization
+
+### [Altair](https://altair-viz.github.io/)
+
+Altair is a declarative statistical visualization library for Python.
+With Altair, you can spend more time understanding your data and its
+meaning. Altair's API is simple, friendly and consistent and built on
+top of the powerful Vega-Lite JSON specification. This elegant
+simplicity produces beautiful and effective visualizations with a
+minimal amount of code. Altair works with Pandas DataFrames.
+
+### [Bokeh](https://bokeh.pydata.org)
+
+Bokeh is a Python interactive visualization library for large datasets
+that natively uses the latest web technologies. Its goal is to provide
+elegant, concise construction of novel graphics in the style of
+Protovis/D3, while delivering high-performance interactivity over large
+data to thin clients.
+
+[Pandas-Bokeh](https://github.com/PatrikHlobil/Pandas-Bokeh) provides a
+high level API for Bokeh that can be loaded as a native Pandas plotting
+backend via
+
+```
+pd.set_option("plotting.backend", "pandas_bokeh")
+```
+
+It is very similar to the matplotlib plotting backend, but provides
+interactive web-based charts and maps.
+
+### [seaborn](https://seaborn.pydata.org)
+
+Seaborn is a Python visualization library based on
+[matplotlib](https://matplotlib.org). It provides a high-level,
+dataset-oriented interface for creating attractive statistical graphics.
+The plotting functions in seaborn understand pandas objects and leverage
+pandas grouping operations internally to support concise specification
+of complex visualizations. Seaborn also goes beyond matplotlib and
+pandas with the option to perform statistical estimation while plotting,
+aggregating across observations and visualizing the fit of statistical
+models to emphasize patterns in a dataset.
+
+### [yhat/ggpy](https://github.com/yhat/ggpy)
+
+Hadley Wickham\'s [ggplot2](https://ggplot2.tidyverse.org/) is a
+foundational exploratory visualization package for the R language. Based
+on [\"The Grammar of
+Graphics\"](https://www.cs.uic.edu/~wilkinson/TheGrammarOfGraphics/GOG.html)
+it provides a powerful, declarative and extremely general way to
+generate bespoke plots of any kind of data. It\'s really quite
+incredible. Various implementations to other languages are available,
+but a faithful implementation for Python users has long been missing.
+Although still young (as of Jan-2014), the
+[yhat/ggpy](https://github.com/yhat/ggpy) project has been progressing
+quickly in that direction.
+
+### [IPython Vega](https://github.com/vega/ipyvega)
+
+[IPython Vega](https://github.com/vega/ipyvega) leverages [Vega
+\]\_\_ to create plots
+within Jupyter Notebook.
+
+### [Plotly](https://plot.ly/python)
+
+[Plotly's](https://plot.ly/) [Python API](https://plot.ly/python/)
+enables interactive figures and web shareability. Maps, 2D, 3D, and
+live-streaming graphs are rendered with WebGL and
+[D3.js](https://d3js.org/). The library supports plotting directly from
+a pandas DataFrame and cloud-based collaboration. Users of [matplotlib,
+ggplot for Python, and
+Seaborn](https://plot.ly/python/matplotlib-to-plotly-tutorial/) can
+convert figures into interactive web-based plots. Plots can be drawn in
+[IPython Notebooks](https://plot.ly/ipython-notebooks/) , edited with R
+or MATLAB, modified in a GUI, or embedded in apps and dashboards. Plotly
+is free for unlimited sharing, and has
+[cloud](https://plot.ly/product/plans/),
+[offline](https://plot.ly/python/offline/), or
+[on-premise](https://plot.ly/product/enterprise/) accounts for private
+use.
+
+### [QtPandas](https://github.com/draperjames/qtpandas)
+
+Spun off from the main pandas library, the
+[qtpandas](https://github.com/draperjames/qtpandas) library enables
+DataFrame visualization and manipulation in PyQt4 and PySide
+applications.
+
+## IDE
+
+### [IPython](https://ipython.org/documentation.html)
+
+IPython is an interactive command shell and distributed computing
+environment. IPython tab completion works with Pandas methods and also
+attributes like DataFrame columns.
+
+### [Jupyter Notebook / Jupyter Lab](https://jupyter.org)
+
+Jupyter Notebook is a web application for creating Jupyter notebooks. A
+Jupyter notebook is a JSON document containing an ordered list of
+input/output cells which can contain code, text, mathematics, plots and
+rich media. Jupyter notebooks can be converted to a number of open
+standard output formats (HTML, HTML presentation slides, LaTeX, PDF,
+ReStructuredText, Markdown, Python) through 'Download As' in the web
+interface and `jupyter convert` in a shell.
+
+Pandas DataFrames implement `_repr_html_`and `_repr_latex` methods which
+are utilized by Jupyter Notebook for displaying (abbreviated) HTML or
+LaTeX tables. LaTeX output is properly escaped. (Note: HTML tables may
+or may not be compatible with non-HTML Jupyter output formats.)
+
+See `Options and Settings ` and
+`Available Options `
+for pandas `display.` settings.
+
+### [quantopian/qgrid](https://github.com/quantopian/qgrid)
+
+qgrid is \"an interactive grid for sorting and filtering DataFrames in
+IPython Notebook\" built with SlickGrid.
+
+### [Spyder](https://www.spyder-ide.org/)
+
+Spyder is a cross-platform PyQt-based IDE combining the editing,
+analysis, debugging and profiling functionality of a software
+development tool with the data exploration, interactive execution, deep
+inspection and rich visualization capabilities of a scientific
+environment like MATLAB or Rstudio.
+
+Its [Variable
+Explorer](https://docs.spyder-ide.org/variableexplorer.html) allows
+users to view, manipulate and edit pandas `Index`, `Series`, and
+`DataFrame` objects like a \"spreadsheet\", including copying and
+modifying values, sorting, displaying a \"heatmap\", converting data
+types and more. Pandas objects can also be renamed, duplicated, new
+columns added, copyed/pasted to/from the clipboard (as TSV), and
+saved/loaded to/from a file. Spyder can also import data from a variety
+of plain text and binary files or the clipboard into a new pandas
+DataFrame via a sophisticated import wizard.
+
+Most pandas classes, methods and data attributes can be autocompleted in
+Spyder\'s [Editor](https://docs.spyder-ide.org/editor.html) and [IPython
+Console](https://docs.spyder-ide.org/ipythonconsole.html), and Spyder\'s
+[Help pane](https://docs.spyder-ide.org/help.html) can retrieve and
+render Numpydoc documentation on pandas objects in rich text with Sphinx
+both automatically and on-demand.
+
+## API
+
+### [pandas-datareader](https://github.com/pydata/pandas-datareader)
+
+`pandas-datareader` is a remote data access library for pandas
+(PyPI:`pandas-datareader`). It is based on functionality that was
+located in `pandas.io.data` and `pandas.io.wb` but was split off in
+v0.19. See more in the [pandas-datareader
+docs](https://pandas-datareader.readthedocs.io/en/latest/):
+
+The following data feeds are available:
+
+- Google Finance
+- Tiingo
+- Morningstar
+- IEX
+- Robinhood
+- Enigma
+- Quandl
+- FRED
+- Fama/French
+- World Bank
+- OECD
+- Eurostat
+- TSP Fund Data
+- Nasdaq Trader Symbol Definitions
+- Stooq Index Data
+- MOEX Data
+
+### [quandl/Python](https://github.com/quandl/Python)
+
+Quandl API for Python wraps the Quandl REST API to return Pandas
+DataFrames with timeseries indexes.
+
+### [pydatastream](https://github.com/vfilimonov/pydatastream)
+
+PyDatastream is a Python interface to the [Thomson Dataworks Enterprise
+(DWE/Datastream)](http://dataworks.thomson.com/Dataworks/Enterprise/1.0/)
+SOAP API to return indexed Pandas DataFrames with financial data. This
+package requires valid credentials for this API (non free).
+
+### [pandaSDMX](https://pandasdmx.readthedocs.io)
+
+pandaSDMX is a library to retrieve and acquire statistical data and
+metadata disseminated in [SDMX](https://www.sdmx.org) 2.1, an
+ISO-standard widely used by institutions such as statistics offices,
+central banks, and international organisations. pandaSDMX can expose
+datasets and related structural metadata including data flows,
+code-lists, and data structure definitions as pandas Series or
+MultiIndexed DataFrames.
+
+### [fredapi](https://github.com/mortada/fredapi)
+
+fredapi is a Python interface to the [Federal Reserve Economic Data
+(FRED)](https://fred.stlouisfed.org/) provided by the Federal Reserve
+Bank of St. Louis. It works with both the FRED database and ALFRED
+database that contains point-in-time data (i.e. historic data
+revisions). fredapi provides a wrapper in Python to the FRED HTTP API,
+and also provides several convenient methods for parsing and analyzing
+point-in-time data from ALFRED. fredapi makes use of pandas and returns
+data in a Series or DataFrame. This module requires a FRED API key that
+you can obtain for free on the FRED website.
+
+## Domain specific
+
+### [Geopandas](https://github.com/kjordahl/geopandas)
+
+Geopandas extends pandas data objects to include geographic information
+which support geometric operations. If your work entails maps and
+geographical coordinates, and you love pandas, you should take a close
+look at Geopandas.
+
+### [xarray](https://github.com/pydata/xarray)
+
+xarray brings the labeled data power of pandas to the physical sciences
+by providing N-dimensional variants of the core pandas data structures.
+It aims to provide a pandas-like and pandas-compatible toolkit for
+analytics on multi-dimensional arrays, rather than the tabular data for
+which pandas excels.
+
+## Out-of-core
+
+### [Blaze](http://blaze.pydata.org/)
+
+Blaze provides a standard API for doing computations with various
+in-memory and on-disk backends: NumPy, Pandas, SQLAlchemy, MongoDB,
+PyTables, PySpark.
+
+### [Dask](https://dask.readthedocs.io/en/latest/)
+
+Dask is a flexible parallel computing library for analytics. Dask
+provides a familiar `DataFrame` interface for out-of-core, parallel and
+distributed computing.
+
+### [Dask-ML](https://dask-ml.readthedocs.io/en/latest/)
+
+Dask-ML enables parallel and distributed machine learning using Dask
+alongside existing machine learning libraries like Scikit-Learn,
+XGBoost, and TensorFlow.
+
+### [Koalas](https://koalas.readthedocs.io/en/latest/)
+
+Koalas provides a familiar pandas DataFrame interface on top of Apache
+Spark. It enables users to leverage multi-cores on one machine or a
+cluster of machines to speed up or scale their DataFrame code.
+
+### [Odo](http://odo.pydata.org)
+
+Odo provides a uniform API for moving data between different formats. It
+uses pandas own `read_csv` for CSV IO and leverages many existing
+packages such as PyTables, h5py, and pymongo to move data between non
+pandas formats. Its graph based approach is also extensible by end users
+for custom formats that may be too specific for the core of odo.
+
+### [Ray](https://ray.readthedocs.io/en/latest/pandas_on_ray.html)
+
+Pandas on Ray is an early stage DataFrame library that wraps Pandas and
+transparently distributes the data and computation. The user does not
+need to know how many cores their system has, nor do they need to
+specify how to distribute the data. In fact, users can continue using
+their previous Pandas notebooks while experiencing a considerable
+speedup from Pandas on Ray, even on a single machine. Only a
+modification of the import statement is needed, as we demonstrate below.
+Once you've changed your import statement, you're ready to use Pandas on
+Ray just like you would Pandas.
+
+```
+# import pandas as pd
+import ray.dataframe as pd
+```
+
+### [Vaex](https://docs.vaex.io/)
+
+Increasingly, packages are being built on top of pandas to address
+specific needs in data preparation, analysis and visualization. Vaex is
+a python library for Out-of-Core DataFrames (similar to Pandas), to
+visualize and explore big tabular datasets. It can calculate statistics
+such as mean, sum, count, standard deviation etc, on an N-dimensional
+grid up to a billion (10^9^) objects/rows per second. Visualization is
+done using histograms, density plots and 3d volume rendering, allowing
+interactive exploration of big data. Vaex uses memory mapping, zero
+memory copy policy and lazy computations for best performance (no memory
+wasted).
+
+- ``vaex.from_pandas``
+- ``vaex.to_pandas_df``
+
+## Data cleaning and validation
+
+### [pyjanitor](https://github.com/ericmjl/pyjanitor/)
+
+Pyjanitor provides a clean API for cleaning data, using method chaining.
+
+### [Engarde](https://engarde.readthedocs.io/en/latest/)
+
+Engarde is a lightweight library used to explicitly state your
+assumptions about your datasets and check that they're *actually* true.
+
+## Extension data types
+
+Pandas provides an interface for defining
+`extension types ` to extend NumPy's type system. The following libraries
+implement that interface to provide types not found in NumPy or pandas,
+which work well with pandas' data containers.
+
+### [cyberpandas](https://cyberpandas.readthedocs.io/en/latest)
+
+Cyberpandas provides an extension type for storing arrays of IP
+Addresses. These arrays can be stored inside pandas\' Series and
+DataFrame.
+
+## Accessors
+
+A directory of projects providing
+`extension accessors `. This is for users to discover new accessors and for library
+authors to coordinate on the namespace.
+
+ Library Accessor Classes
+ ------------------------------------------------------------- ---------- -----------------------
+ [cyberpandas](https://cyberpandas.readthedocs.io/en/latest) `ip` `Series`
+ [pdvega](https://altair-viz.github.io/pdvega/) `vgplot` `Series`, `DataFrame`
diff --git a/web/pandas/community/roadmap.md b/web/pandas/community/roadmap.md
new file mode 100644
index 0000000000000..8a5c2735b3d93
--- /dev/null
+++ b/web/pandas/community/roadmap.md
@@ -0,0 +1,195 @@
+# Roadmap
+
+This page provides an overview of the major themes in pandas'
+development. Each of these items requires a relatively large amount of
+effort to implement. These may be achieved more quickly with dedicated
+funding or interest from contributors.
+
+An item being on the roadmap does not mean that it will *necessarily*
+happen, even with unlimited funding. During the implementation period we
+may discover issues preventing the adoption of the feature.
+
+Additionally, an item *not* being on the roadmap does not exclude it
+from inclusion in pandas. The roadmap is intended for larger,
+fundamental changes to the project that are likely to take months or
+years of developer time. Smaller-scoped items will continue to be
+tracked on our [issue tracker](https://github.com/pandas-dev/pandas/issues).
+
+See [Roadmap evolution](#roadmap-evolution) for proposing
+changes to this document.
+
+## Extensibility
+
+Pandas `extending.extension-types` allow
+for extending NumPy types with custom data types and array storage.
+Pandas uses extension types internally, and provides an interface for
+3rd-party libraries to define their own custom data types.
+
+Many parts of pandas still unintentionally convert data to a NumPy
+array. These problems are especially pronounced for nested data.
+
+We'd like to improve the handling of extension arrays throughout the
+library, making their behavior more consistent with the handling of
+NumPy arrays. We'll do this by cleaning up pandas' internals and
+adding new methods to the extension array interface.
+
+## String data type
+
+Currently, pandas stores text data in an `object` -dtype NumPy array.
+The current implementation has two primary drawbacks: First, `object`
+-dtype is not specific to strings: any Python object can be stored in an
+`object` -dtype array, not just strings. Second: this is not efficient.
+The NumPy memory model isn't especially well-suited to variable width
+text data.
+
+To solve the first issue, we propose a new extension type for string
+data. This will initially be opt-in, with users explicitly requesting
+`dtype="string"`. The array backing this string dtype may initially be
+the current implementation: an `object` -dtype NumPy array of Python
+strings.
+
+To solve the second issue (performance), we'll explore alternative
+in-memory array libraries (for example, Apache Arrow). As part of the
+work, we may need to implement certain operations expected by pandas
+users (for example the algorithm used in, `Series.str.upper`). That work
+may be done outside of pandas.
+
+## Apache Arrow interoperability
+
+[Apache Arrow](https://arrow.apache.org) is a cross-language development
+platform for in-memory data. The Arrow logical types are closely aligned
+with typical pandas use cases.
+
+We'd like to provide better-integrated support for Arrow memory and
+data types within pandas. This will let us take advantage of its I/O
+capabilities and provide for better interoperability with other
+languages and libraries using Arrow.
+
+## Block manager rewrite
+
+We'd like to replace pandas current internal data structures (a
+collection of 1 or 2-D arrays) with a simpler collection of 1-D arrays.
+
+Pandas internal data model is quite complex. A DataFrame is made up of
+one or more 2-dimensional "blocks", with one or more blocks per dtype.
+This collection of 2-D arrays is managed by the BlockManager.
+
+The primary benefit of the BlockManager is improved performance on
+certain operations (construction from a 2D array, binary operations,
+reductions across the columns), especially for wide DataFrames. However,
+the BlockManager substantially increases the complexity and maintenance
+burden of pandas.
+
+By replacing the BlockManager we hope to achieve
+
+- Substantially simpler code
+- Easier extensibility with new logical types
+- Better user control over memory use and layout
+- Improved micro-performance
+- Option to provide a C / Cython API to pandas' internals
+
+See [these design
+documents](https://dev.pandas.io/pandas2/internal-architecture.html#removal-of-blockmanager-new-dataframe-internals)
+for more.
+
+## Decoupling of indexing and internals
+
+The code for getting and setting values in pandas' data structures
+needs refactoring. In particular, we must clearly separate code that
+converts keys (e.g., the argument to `DataFrame.loc`) to positions from
+code that uses these positions to get or set values. This is related to
+the proposed BlockManager rewrite. Currently, the BlockManager sometimes
+uses label-based, rather than position-based, indexing. We propose that
+it should only work with positional indexing, and the translation of
+keys to positions should be entirely done at a higher level.
+
+Indexing is a complicated API with many subtleties. This refactor will
+require care and attention. More details are discussed at
+
+
+## Numba-accelerated operations
+
+[Numba](https://numba.pydata.org) is a JIT compiler for Python code.
+We'd like to provide ways for users to apply their own Numba-jitted
+functions where pandas accepts user-defined functions (for example,
+`Series.apply`,
+`DataFrame.apply`,
+`DataFrame.applymap`, and in groupby and
+window contexts). This will improve the performance of
+user-defined-functions in these operations by staying within compiled
+code.
+
+## Documentation improvements
+
+We'd like to improve the content, structure, and presentation of the
+pandas documentation. Some specific goals include
+
+- Overhaul the HTML theme with a modern, responsive design
+ (`15556`)
+- Improve the "Getting Started" documentation, designing and writing
+ learning paths for users different backgrounds (e.g. brand new to
+ programming, familiar with other languages like R, already familiar
+ with Python).
+- Improve the overall organization of the documentation and specific
+ subsections of the documentation to make navigation and finding
+ content easier.
+
+## Package docstring validation
+
+To improve the quality and consistency of pandas docstrings, we've
+developed tooling to check docstrings in a variety of ways.
+
+contains the checks.
+
+Like many other projects, pandas uses the
+[numpydoc](https://numpydoc.readthedocs.io/en/latest/) style for writing
+docstrings. With the collaboration of the numpydoc maintainers, we'd
+like to move the checks to a package other than pandas so that other
+projects can easily use them as well.
+
+## Performance monitoring
+
+Pandas uses [airspeed velocity](https://asv.readthedocs.io/en/stable/)
+to monitor for performance regressions. ASV itself is a fabulous tool,
+but requires some additional work to be integrated into an open source
+project's workflow.
+
+The [asv-runner](https://github.com/asv-runner) organization, currently
+made up of pandas maintainers, provides tools built on top of ASV. We
+have a physical machine for running a number of project's benchmarks,
+and tools managing the benchmark runs and reporting on results.
+
+We'd like to fund improvements and maintenance of these tools to
+
+- Be more stable. Currently, they're maintained on the nights and
+ weekends when a maintainer has free time.
+- Tune the system for benchmarks to improve stability, following
+
+- Build a GitHub bot to request ASV runs *before* a PR is merged.
+ Currently, the benchmarks are only run nightly.
+
+## Roadmap Evolution
+
+Pandas continues to evolve. The direction is primarily determined by
+community interest. Everyone is welcome to review existing items on the
+roadmap and to propose a new item.
+
+Each item on the roadmap should be a short summary of a larger design
+proposal. The proposal should include
+
+1. Short summary of the changes, which would be appropriate for
+ inclusion in the roadmap if accepted.
+2. Motivation for the changes.
+3. An explanation of why the change is in scope for pandas.
+4. Detailed design: Preferably with example-usage (even if not
+ implemented yet) and API documentation
+5. API Change: Any API changes that may result from the proposal.
+
+That proposal may then be submitted as a GitHub issue, where the pandas
+maintainers can review and comment on the design. The [pandas mailing
+list](https://mail.python.org/mailman/listinfo/pandas-dev) should be
+notified of the proposal.
+
+When there's agreement that an implementation would be welcome, the
+roadmap should be updated to include the summary and a link to the
+discussion issue.
diff --git a/web/pandas/community/team.md b/web/pandas/community/team.md
new file mode 100644
index 0000000000000..c0a15081e1fa8
--- /dev/null
+++ b/web/pandas/community/team.md
@@ -0,0 +1,101 @@
+# Team
+
+## Contributors
+
+_pandas_ is made with love by more than [1,500 volunteer contributors](https://github.com/pandas-dev/pandas/graphs/contributors).
+
+If you want to support pandas development, you can find information in the [donations page](../donate.html).
+
+## Maintainers
+
+
+ {% for row in maintainers.people | batch(6, "") %}
+
+
+## BDFL
+
+Wes McKinney is the Benevolent Dictator for Life (BDFL).
+
+## Governance
+
+The project governance is available in the [project governance documents](https://github.com/pandas-dev/pandas-governance).
+
+## NumFOCUS
+
+
+
+_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States.
+NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the
+health and sustainability of the project. Visit numfocus.org for more information.
+
+Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible
+to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation.
+
+## Code of conduct committee
+
+
+ {% for person in maintainers.coc %}
+
{{ person }}
+ {% endfor %}
+
+
+## NumFOCUS committee
+
+
+ {% for person in maintainers.numfocus %}
+
{{ person }}
+ {% endfor %}
+
+
+## Institutional partners
+
+
+ {% for company in partners.active if company.employs %}
+
+
+In-kind sponsors
+
+- [Indeed](https://opensource.indeedeng.io/): Logo and website design
+- Can we find a donor for the hosting (website, benchmarks,...?)
+
+## Emeritus maintainers
+
+
diff --git a/web/pandas/config.yml b/web/pandas/config.yml
new file mode 100644
index 0000000000000..ba979e220f3bd
--- /dev/null
+++ b/web/pandas/config.yml
@@ -0,0 +1,129 @@
+main:
+ templates_path: _templates
+ base_template: "layout.html"
+ ignore:
+ - _templates/layout.html
+ - config.yml
+ - blog.html # blog will be added at a later stage
+ - try.md # the binder page will be added later
+ github_repo_url: pandas-dev/pandas
+ context_preprocessors:
+ - pandas_web.Preprocessors.navbar_add_info
+ # - pandas_web.Preprocessors.blog_add_posts
+ - pandas_web.Preprocessors.maintainers_add_info
+ - pandas_web.Preprocessors.home_add_releases
+ markdown_extensions:
+ - toc
+ - tables
+ - fenced_code
+static:
+ logo: # path to the logo when it's in the repo
+ css:
+ - /static/css/pandas.css
+navbar:
+ - name: "Install"
+ target: /install.html
+ - name: "Documentation"
+ target:
+ - name: "Getting started"
+ target: https://pandas.pydata.org/pandas-docs/stable/getting_started/index.html
+ - name: "User guide"
+ target: https://pandas.pydata.org/pandas-docs/stable/user_guide/index.html
+ - name: "API reference"
+ target: https://pandas.pydata.org/pandas-docs/stable/reference/index.html
+ - name: "Contributing to pandas"
+ target: https://pandas.pydata.org/pandas-docs/stable/development/index.html
+ - name: "Release notes"
+ target: https://pandas.pydata.org/pandas-docs/stable/whatsnew/index.html
+ - name: "Community"
+ target:
+ - name: "About pandas"
+ target: /community/about.html
+ - name: "Project roadmap"
+ target: /community/roadmap.html
+ - name: "Ecosystem"
+ target: /community/ecosystem.html
+ - name: "Ask a question (StackOverflow)"
+ target: https://stackoverflow.com/questions/tagged/pandas
+ - name: "Discuss (mailing list)"
+ target: https://groups.google.com/forum/#!forum/pydata
+ - name: "Team"
+ target: /community/team.html
+ - name: "Code of Conduct"
+ target: /community/coc.html
+ - name: "Citing pandas"
+ target: /community/citing.html
+ # - name: "Blog"
+ # target: /blog.html
+ - name: "Donate"
+ target: /donate.html
+blog:
+ num_posts: 8
+ feed:
+ - https://wesmckinney.com/feeds/pandas.atom.xml
+ - https://tomaugspurger.github.io/feed
+ - https://jorisvandenbossche.github.io/feeds/all.atom.xml
+ - https://datapythonista.github.io/blog/feeds/pandas.atom.xml
+ - https://numfocus.org/tag/pandas/feed/
+maintainers:
+ active:
+ - wesm
+ - jorisvandenbossche
+ - TomAugspurger
+ - shoyer
+ - jreback
+ - chris-b1
+ - sinhrks
+ - cpcloud
+ - gfyoung
+ - toobaz
+ - WillAyd
+ - mroeschke
+ - jschendel
+ - jbrockmendel
+ - datapythonista
+ - simonjayhawkins
+ - topper-123
+ emeritus:
+ - Wouter Overmeire
+ - Skipper Seabold
+ - Jeff Tratner
+ coc:
+ - Safia Abdalla
+ - Tom Augspurger
+ - Joris Van den Bossche
+ - Camille Scott
+ - Nathaniel Smith
+ numfocus:
+ - Phillip Cloud
+ - Stephan Hoyer
+ - Wes McKinney
+ - Jeff Reback
+ - Joris Van den Bossche
+partners:
+ active:
+ - name: "NumFOCUS"
+ url: https://numfocus.org/
+ logo: /static/img/partners/numfocus.svg
+ - name: "Anaconda"
+ url: https://www.anaconda.com/
+ logo: /static/img/partners/anaconda.svg
+ employs: "Tom Augspurger, Brock Mendel"
+ - name: "Two Sigma"
+ url: https://www.twosigma.com/
+ logo: /static/img/partners/two_sigma.svg
+ employs: "Phillip Cloud, Jeff Reback"
+ - name: "RStudio"
+ url: https://www.rstudio.com/
+ logo: /static/img/partners/r_studio.svg
+ employs: "Wes McKinney"
+ - name: "Ursa Labs"
+ url: https://ursalabs.org/
+ logo: /static/img/partners/ursa_labs.svg
+ employs: "Wes McKinney, Joris Van den Bossche"
+ - name: "Tidelift"
+ url: https://tidelift.com
+ logo: /static/img/partners/tidelift.svg
+ past:
+ - name: "Paris-Saclay Center for Data Science"
+ url: https://www.datascience-paris-saclay.fr/
diff --git a/web/pandas/donate.md b/web/pandas/donate.md
new file mode 100644
index 0000000000000..5badb4c5a2031
--- /dev/null
+++ b/web/pandas/donate.md
@@ -0,0 +1,25 @@
+# Donate to pandas
+
+_pandas_ is and always will be **free**. To make de development sustainable, we need _pandas_ users, corporate
+or individual, to support the development by providing their time and money.
+
+You can find more information about current developers and supporters in the [team page](community/team.html).
+Financial contributions will mainly be used to advance in the [pandas roadmap](community/roadmap.html).
+
+- If your **company or organization** is interested in helping make pandas better, please contact us at [info@numfocus.org](mailto:info@numfocus.org)
+- If you want to contribute to _pandas_ with your **time**, please visit the [contributing page](https://pandas.pydata.org/pandas-docs/stable/development/index.html)
+- If you want to support _pandas_ with a **donation**, please use the form below:
+
+
+
+
+
+
+_pandas_ is a Sponsored Project of [NumFOCUS](https://numfocus.org/), a 501(c)(3) nonprofit charity in the United States.
+NumFOCUS provides _pandas_ with fiscal, legal, and administrative support to help ensure the
+health and sustainability of the project. Visit numfocus.org for more information.
+
+Donations to _pandas_ are managed by NumFOCUS. For donors in the United States, your gift is tax-deductible
+to the extent provided by law. As with any donation, you should consult with your tax adviser about your particular tax situation.
diff --git a/web/pandas/index.html b/web/pandas/index.html
new file mode 100644
index 0000000000000..696f0862aa109
--- /dev/null
+++ b/web/pandas/index.html
@@ -0,0 +1,114 @@
+{% extends "layout.html" %}
+{% block body %}
+
+
+
+
+
pandas
+
+ pandas is a fast, powerful, flexible and easy to use open source data analysis and manipulation tool,
+ built on top of the Python programming language.
+
+
+{% endblock %}
diff --git a/web/pandas/install.md b/web/pandas/install.md
new file mode 100644
index 0000000000000..c6cccd803e33e
--- /dev/null
+++ b/web/pandas/install.md
@@ -0,0 +1,28 @@
+# Installation instructions
+
+The next steps provides the easiest and recommended way to set up your
+environment to use pandas. Other installation options can be found in
+the [advanced installation page](https://pandas.pydata.org/pandas-docs/stable/install.html).
+
+1. Download [Anaconda](https://www.anaconda.com/distribution/) for your operating system and
+ the latest Python version, run the installer, and follow the steps. Detailed instructions
+ on how to install Anaconda can be found in the
+ [Anaconda documentation](https://docs.anaconda.com/anaconda/install/)).
+
+2. In the Anaconda prompt (or terminal in Linux or MacOS), start JupyterLab:
+
+
+
+3. In JupyterLab, create a new (Python 3) notebook:
+
+
+
+4. In the first cell of the notebook, you can import pandas and check the version with:
+
+
+
+5. Now you are ready to use pandas you can write your code in the next cells.
+
+
+You can learn more about pandas in the [tutorials](#), and more about JupyterLab
+in the [JupyterLab documentation](https://jupyterlab.readthedocs.io/en/stable/user/interface.html).
diff --git a/web/pandas/static/css/pandas.css b/web/pandas/static/css/pandas.css
new file mode 100644
index 0000000000000..5911de96b5fa9
--- /dev/null
+++ b/web/pandas/static/css/pandas.css
@@ -0,0 +1,16 @@
+body {
+ padding-top: 5em;
+ padding-bottom: 3em;
+}
+code {
+ white-space: pre;
+}
+a.navbar-brand img {
+ max-height: 2em;
+}
+div.card {
+ margin: 0 0 .2em .2em !important;
+}
+.book {
+ padding: 0 20%;
+}
diff --git a/web/pandas/static/img/install/anaconda_prompt.png b/web/pandas/static/img/install/anaconda_prompt.png
new file mode 100644
index 0000000000000..7b547e4ebb02a
Binary files /dev/null and b/web/pandas/static/img/install/anaconda_prompt.png differ
diff --git a/web/pandas/static/img/install/jupyterlab_home.png b/web/pandas/static/img/install/jupyterlab_home.png
new file mode 100644
index 0000000000000..c62d33a5e0fc6
Binary files /dev/null and b/web/pandas/static/img/install/jupyterlab_home.png differ
diff --git a/web/pandas/static/img/install/pandas_import_and_version.png b/web/pandas/static/img/install/pandas_import_and_version.png
new file mode 100644
index 0000000000000..64c1303ac495c
Binary files /dev/null and b/web/pandas/static/img/install/pandas_import_and_version.png differ
diff --git a/web/pandas/static/img/pandas.svg b/web/pandas/static/img/pandas.svg
new file mode 120000
index 0000000000000..2e5d3872e4845
--- /dev/null
+++ b/web/pandas/static/img/pandas.svg
@@ -0,0 +1 @@
+../../../../doc/logo/pandas_logo.svg
\ No newline at end of file
diff --git a/web/pandas/static/img/partners/anaconda.svg b/web/pandas/static/img/partners/anaconda.svg
new file mode 100644
index 0000000000000..fcddf72ebaa28
--- /dev/null
+++ b/web/pandas/static/img/partners/anaconda.svg
@@ -0,0 +1,99 @@
+
+
\ No newline at end of file
diff --git a/web/pandas/static/img/partners/numfocus.svg b/web/pandas/static/img/partners/numfocus.svg
new file mode 100644
index 0000000000000..fcdd87b41e475
--- /dev/null
+++ b/web/pandas/static/img/partners/numfocus.svg
@@ -0,0 +1,60 @@
+
+
+
\ No newline at end of file
diff --git a/web/pandas/static/img/partners/r_studio.svg b/web/pandas/static/img/partners/r_studio.svg
new file mode 100644
index 0000000000000..15a1d2a30ff30
--- /dev/null
+++ b/web/pandas/static/img/partners/r_studio.svg
@@ -0,0 +1,50 @@
+
+
+
\ No newline at end of file
diff --git a/web/pandas/static/img/partners/tidelift.svg b/web/pandas/static/img/partners/tidelift.svg
new file mode 100644
index 0000000000000..af12d68417235
--- /dev/null
+++ b/web/pandas/static/img/partners/tidelift.svg
@@ -0,0 +1,33 @@
+
+
+
diff --git a/web/pandas/static/img/partners/two_sigma.svg b/web/pandas/static/img/partners/two_sigma.svg
new file mode 100644
index 0000000000000..d38df12766ed6
--- /dev/null
+++ b/web/pandas/static/img/partners/two_sigma.svg
@@ -0,0 +1 @@
+
diff --git a/web/pandas/static/img/partners/ursa_labs.svg b/web/pandas/static/img/partners/ursa_labs.svg
new file mode 100644
index 0000000000000..cacc80e337d25
--- /dev/null
+++ b/web/pandas/static/img/partners/ursa_labs.svg
@@ -0,0 +1,106 @@
+
+
+
diff --git a/web/pandas/static/img/pydata_book.gif b/web/pandas/static/img/pydata_book.gif
new file mode 100644
index 0000000000000..db05c209704a2
Binary files /dev/null and b/web/pandas/static/img/pydata_book.gif differ
diff --git a/web/pandas/try.md b/web/pandas/try.md
new file mode 100644
index 0000000000000..20e119759df6f
--- /dev/null
+++ b/web/pandas/try.md
@@ -0,0 +1,21 @@
+# Try pandas online
+
+
+
+
+
+
+
+## Interactive tutorials
+
+You can also try _pandas_ on [Binder](https://mybinder.org/) for one of the next topics:
+
+- Exploratory analysis of US presidents
+- Preprocessing the Titanic dataset to train a machine learning model
+- Forecasting the stock market
+
+_(links will be added soon)_
diff --git a/web/pandas_web.py b/web/pandas_web.py
new file mode 100644
index 0000000000000..d515d8a0e1cd7
--- /dev/null
+++ b/web/pandas_web.py
@@ -0,0 +1,286 @@
+#!/usr/bin/env python
+"""
+Simple static site generator for the pandas web.
+
+pandas_web.py takes a directory as parameter, and copies all the files into the
+target directory after converting markdown files into html and rendering both
+markdown and html files with a context. The context is obtained by parsing
+the file ``config.yml`` in the root of the source directory.
+
+The file should contain:
+```
+main:
+ template_path:
+ base_template:
+ ignore:
+ -
+ github_repo_url:
+ context_preprocessors:
+ -
+ markdown_extensions:
+ -
+```
+
+The rest of the items in the file will be added directly to the context.
+"""
+import argparse
+import datetime
+import importlib
+import operator
+import os
+import shutil
+import sys
+import time
+import typing
+
+import feedparser
+import markdown
+import jinja2
+import requests
+import yaml
+
+
+class Preprocessors:
+ """
+ Built-in context preprocessors.
+
+ Context preprocessors are functions that receive the context used to
+ render the templates, and enriches it with additional information.
+
+ The original context is obtained by parsing ``config.yml``, and
+ anything else needed just be added with context preprocessors.
+ """
+
+ @staticmethod
+ def navbar_add_info(context):
+ """
+ Items in the main navigation bar can be direct links, or dropdowns with
+ subitems. This context preprocessor adds a boolean field
+ ``has_subitems`` that tells which one of them every element is. It
+ also adds a ``slug`` field to be used as a CSS id.
+ """
+ for i, item in enumerate(context["navbar"]):
+ context["navbar"][i] = dict(
+ item,
+ has_subitems=isinstance(item["target"], list),
+ slug=(item["name"].replace(" ", "-").lower()),
+ )
+ return context
+
+ @staticmethod
+ def blog_add_posts(context):
+ """
+ Given the blog feed defined in the configuration yaml, this context
+ preprocessor fetches the posts in the feeds, and returns the relevant
+ information for them (sorted from newest to oldest).
+ """
+ posts = []
+ for feed_url in context["blog"]["feed"]:
+ feed_data = feedparser.parse(feed_url)
+ for entry in feed_data.entries:
+ published = datetime.datetime.fromtimestamp(
+ time.mktime(entry.published_parsed)
+ )
+ posts.append(
+ {
+ "title": entry.title,
+ "author": entry.author,
+ "published": published,
+ "feed": feed_data["feed"]["title"],
+ "link": entry.link,
+ "description": entry.description,
+ "summary": entry.summary,
+ }
+ )
+ posts.sort(key=operator.itemgetter("published"), reverse=True)
+ context["blog"]["posts"] = posts[: context["blog"]["num_posts"]]
+ return context
+
+ @staticmethod
+ def maintainers_add_info(context):
+ """
+ Given the active maintainers defined in the yaml file, it fetches
+ the GitHub user information for them.
+ """
+ context["maintainers"]["people"] = []
+ for user in context["maintainers"]["active"]:
+ resp = requests.get(f"https://api.github.com/users/{user}")
+ if context["ignore_io_errors"] and resp.status_code == 403:
+ return context
+ resp.raise_for_status()
+ context["maintainers"]["people"].append(resp.json())
+ return context
+
+ @staticmethod
+ def home_add_releases(context):
+ context["releases"] = []
+
+ github_repo_url = context["main"]["github_repo_url"]
+ resp = requests.get(f"https://api.github.com/repos/{github_repo_url}/releases")
+ if context["ignore_io_errors"] and resp.status_code == 403:
+ return context
+ resp.raise_for_status()
+
+ for release in resp.json():
+ if release["prerelease"]:
+ continue
+ published = datetime.datetime.strptime(
+ release["published_at"], "%Y-%m-%dT%H:%M:%SZ"
+ )
+ context["releases"].append(
+ {
+ "name": release["tag_name"].lstrip("v"),
+ "tag": release["tag_name"],
+ "published": published,
+ "url": (
+ release["assets"][0]["browser_download_url"]
+ if release["assets"]
+ else ""
+ ),
+ }
+ )
+ return context
+
+
+def get_callable(obj_as_str: str) -> object:
+ """
+ Get a Python object from its string representation.
+
+ For example, for ``sys.stdout.write`` would import the module ``sys``
+ and return the ``write`` function.
+ """
+ components = obj_as_str.split(".")
+ attrs = []
+ while components:
+ try:
+ obj = importlib.import_module(".".join(components))
+ except ImportError:
+ attrs.insert(0, components.pop())
+ else:
+ break
+
+ if not obj:
+ raise ImportError(f'Could not import "{obj_as_str}"')
+
+ for attr in attrs:
+ obj = getattr(obj, attr)
+
+ return obj
+
+
+def get_context(config_fname: str, ignore_io_errors: bool, **kwargs):
+ """
+ Load the config yaml as the base context, and enrich it with the
+ information added by the context preprocessors defined in the file.
+ """
+ with open(config_fname) as f:
+ context = yaml.safe_load(f)
+
+ context["ignore_io_errors"] = ignore_io_errors
+ context.update(kwargs)
+
+ preprocessors = (
+ get_callable(context_prep)
+ for context_prep in context["main"]["context_preprocessors"]
+ )
+ for preprocessor in preprocessors:
+ context = preprocessor(context)
+ msg = f"{preprocessor.__name__} is missing the return statement"
+ assert context is not None, msg
+
+ return context
+
+
+def get_source_files(source_path: str) -> typing.Generator[str, None, None]:
+ """
+ Generate the list of files present in the source directory.
+ """
+ for root, dirs, fnames in os.walk(source_path):
+ root = os.path.relpath(root, source_path)
+ for fname in fnames:
+ yield os.path.join(root, fname)
+
+
+def extend_base_template(content: str, base_template: str) -> str:
+ """
+ Wrap document to extend the base template, before it is rendered with
+ Jinja2.
+ """
+ result = '{% extends "' + base_template + '" %}'
+ result += "{% block body %}"
+ result += content
+ result += "{% endblock %}"
+ return result
+
+
+def main(
+ source_path: str, target_path: str, base_url: str, ignore_io_errors: bool
+) -> int:
+ """
+ Copy every file in the source directory to the target directory.
+
+ For ``.md`` and ``.html`` files, render them with the context
+ before copyings them. ``.md`` files are transformed to HTML.
+ """
+ config_fname = os.path.join(source_path, "config.yml")
+
+ shutil.rmtree(target_path, ignore_errors=True)
+ os.makedirs(target_path, exist_ok=True)
+
+ sys.stderr.write("Generating context...\n")
+ context = get_context(config_fname, ignore_io_errors, base_url=base_url)
+ sys.stderr.write("Context generated\n")
+
+ templates_path = os.path.join(source_path, context["main"]["templates_path"])
+ jinja_env = jinja2.Environment(loader=jinja2.FileSystemLoader(templates_path))
+
+ for fname in get_source_files(source_path):
+ if os.path.normpath(fname) in context["main"]["ignore"]:
+ continue
+
+ sys.stderr.write(f"Processing {fname}\n")
+ dirname = os.path.dirname(fname)
+ os.makedirs(os.path.join(target_path, dirname), exist_ok=True)
+
+ extension = os.path.splitext(fname)[-1]
+ if extension in (".html", ".md"):
+ with open(os.path.join(source_path, fname)) as f:
+ content = f.read()
+ if extension == ".md":
+ body = markdown.markdown(
+ content, extensions=context["main"]["markdown_extensions"]
+ )
+ content = extend_base_template(body, context["main"]["base_template"])
+ content = jinja_env.from_string(content).render(**context)
+ fname = os.path.splitext(fname)[0] + ".html"
+ with open(os.path.join(target_path, fname), "w") as f:
+ f.write(content)
+ else:
+ shutil.copy(
+ os.path.join(source_path, fname), os.path.join(target_path, dirname)
+ )
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(description="Documentation builder.")
+ parser.add_argument(
+ "source_path", help="path to the source directory (must contain config.yml)"
+ )
+ parser.add_argument(
+ "--target-path", default="build", help="directory where to write the output"
+ )
+ parser.add_argument(
+ "--base-url", default="", help="base url where the website is served from"
+ )
+ parser.add_argument(
+ "--ignore-io-errors",
+ action="store_true",
+ help="do not fail if errors happen when fetching "
+ "data from http sources, and those fail "
+ "(mostly useful to allow github quota errors "
+ "when running the script locally)",
+ )
+ args = parser.parse_args()
+ sys.exit(
+ main(args.source_path, args.target_path, args.base_url, args.ignore_io_errors)
+ )