{ "cells": [ { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "source": [ "# Expanding the measurement of culture with a sample of two billion humans" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "## Replication Data and Code" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "This notebook provides all the steps to replicate the results of our paper [Expanding the measurement of culture with a sample of two billion humans](https://doi.org/10.1098/rsif.2022.0085) published in the *Journal of the Royal Society Interface 19:20220085* (2022)." ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Setup and Pre-requisites" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "-" } }, "source": [ "Let's start by importing the required packages" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "slideshow": { "slide_type": "subslide" } }, "outputs": [], "source": [ "#%pylab --no-import-all\n", "%matplotlib inline\n", "\n", "import sys, os, time\n", "import numpy as np\n", "import pandas as pd\n", "pd.set_option('display.width', 160)\n", "import matplotlib.pyplot as plt\n", "import matplotlib as mpl\n", "import matplotlib.patches as mpatches\n", "import statsmodels.api as sm\n", "import statsmodels.formula.api as smf\n", "from statsmodels.iolib.summary2 import summary_col\n", "from sklearn.metrics.pairwise import cosine_similarity, cosine_distances, manhattan_distances, pairwise_distances\n", "from scipy.stats import zscore\n", "from scipy.cluster.hierarchy import dendrogram, linkage\n", "from scipy import spatial, stats\n", "from scipy.stats import zscore\n", "import MantelTest.MantelTest as MantelTest\n", "import re\n", "import seaborn as sns" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "Let's setup our paths" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "pathfb = './data/'\n", "pathfbor = './data/OriginalData/'\n", "pathout = pathfb + 'Regs/'\n", "if os.path.exists(pathout) == False:\n", " os.mkdir(pathout)\n", "pathshare = pathout\n", "if os.path.exists(pathfbor) == False:\n", " os.mkdir(pathfbor)" ] }, { "cell_type": "markdown", "metadata": { "slideshow": { "slide_type": "subslide" } }, "source": [ "Let's load the pairwise distance data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "mypairs = pd.read_stata(pathout + 'AllDists.dta')\n", "mypairs.drop([x for x in mypairs.columns if x.endswith('uk') or x.endswith('usa')], inplace=True, axis=1)\n", "mypairs.drop([x for x in mypairs.columns if x.find('cognate')!=-1], inplace=True, axis=1)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | ISO_CODE_1 | \n", "ISO_CODE_2 | \n", "CosDist1 | \n", "CosDist2 | \n", "CosDist3 | \n", "CosDist4 | \n", "CosDist5 | \n", "CosDist6 | \n", "CosDist7 | \n", "CosDist8 | \n", "... | \n", "total_non_binary | \n", "CosDistAll | \n", "CosDistBin | \n", "CosDistOptions | \n", "CosDistScale | \n", "FBDist | \n", "dist | \n", "distcap | \n", "distw | \n", "distwces | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "AD | \n", "AE | \n", "0.649726 | \n", "1.000000 | \n", "1.000000 | \n", "1.0 | \n", "1.000000 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.233552 | \n", "5209.694824 | \n", "5209.694824 | \n", "5239.464994 | \n", "5239.175640 | \n", "
1 | \n", "AD | \n", "AF | \n", "0.027777 | \n", "1.000000 | \n", "1.000000 | \n", "1.0 | \n", "1.000000 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.279233 | \n", "5806.358887 | \n", "5806.358887 | \n", "5712.403090 | \n", "5707.325970 | \n", "
2 | \n", "AD | \n", "AG | \n", "0.298230 | \n", "1.000000 | \n", "1.000000 | \n", "1.0 | \n", "1.000000 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.227161 | \n", "6565.212402 | \n", "6565.212402 | \n", "6574.278222 | \n", "6574.205836 | \n", "
3 | \n", "AD | \n", "AI | \n", "0.917672 | \n", "1.000000 | \n", "1.000000 | \n", "1.0 | \n", "1.000000 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.262021 | \n", "6589.531250 | \n", "6589.531250 | \n", "6593.265340 | \n", "6593.264953 | \n", "
4 | \n", "AD | \n", "AL | \n", "0.002674 | \n", "0.998614 | \n", "0.998062 | \n", "1.0 | \n", "0.999967 | \n", "1.0 | \n", "1.0 | \n", "1.0 | \n", "... | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0.216498 | \n", "1519.550659 | \n", "1519.550659 | \n", "1523.718420 | \n", "1523.040130 | \n", "
5 rows × 46 columns
\n", "