{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Difference between map, applymap and apply methods in Pandas" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idscore
01559481562.118782
11580525413.003589
215656148997.357200
\n", "
" ], "text/plain": [ " id score\n", "0 15594815 62.118782\n", "1 15805254 13.003589\n", "2 15656148 997.357200" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "d = {\"id\":[15594815,15805254,15656148],\"score\":[62.118782,13.003589,997.3572]}\n", "df_map = pd.DataFrame(data=d)\n", "df_map.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": false }, "outputs": [], "source": [ "def score2label(x):\n", " if x>500:\n", " return 1\n", " else :\n", " return 0" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# map() 用法\n", "* map() 是 series 函数\n", "* map() 支持传入 lambda 表达式和函数\n", "* map() 不支持额外传参" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idscorescore1
01559481562.1187820
11580525413.0035890
215656148997.3572001
\n", "
" ], "text/plain": [ " id score score1\n", "0 15594815 62.118782 0\n", "1 15805254 13.003589 0\n", "2 15656148 997.357200 1" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#df_map['score1'] = df_map['score'].map(lambda x: 1 if x>500 else 0)\n", "df_map['score1'] = df_map['score'].map(score2label)\n", "df_map.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# apply() 用法\n", "* apply() 既可以dataframe又可以series \n", "* apply() 应用更复杂的功能" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 如额外传参(Series):" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idscorescore1score2
01559481562.11878203
11580525413.00358903
215656148997.35720014
\n", "
" ], "text/plain": [ " id score score1 score2\n", "0 15594815 62.118782 0 3\n", "1 15805254 13.003589 0 3\n", "2 15656148 997.357200 1 4" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def score2label1(x, y):\n", " if x>500:\n", " return 1 + y\n", " else :\n", " return 0 + y\n", "# apply() applymap() 是 pandas 函数, apply()作用于一列,通常为统计,applymap()为所有\n", "df_map['score2'] = df_map['score'].apply(score2label1, y=3)\n", "df_map.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 行列求和(Dataframe):" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "id 4.705622e+07\n", "score 1.072480e+03\n", "score1 1.000000e+00\n", "score2 1.000000e+01\n", "dtype: float64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_map.apply(np.sum, axis=0)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0 1.559488e+07\n", "1 1.580527e+07\n", "2 1.565715e+07\n", "dtype: float64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_map.apply(np.sum, axis=1)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# applymap() 用法\n", "* applymap() 应用于整个 Dataframe" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idscorescore1score2
015594815.0062.120.003.00
115805254.0013.000.003.00
215656148.00997.361.004.00
\n", "
" ], "text/plain": [ " id score score1 score2\n", "0 15594815.00 62.12 0.00 3.00\n", "1 15805254.00 13.00 0.00 3.00\n", "2 15656148.00 997.36 1.00 4.00" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_map = df_map.applymap(lambda x: '%.2f' % x)\n", "df_map.head()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }