{ "cells": [ { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Patron Type CodePatron Type DefinitionTotal CheckoutsTotal RenewalsAge RangeHome Library CodeHome Library DefinitionCirculation Active MonthCirculation Active YearNotice Preference CodeNotice Preference DefinitionProvided Email AddressYear Patron RegisteredWithin San Francisco County
05Staff5315NaNb2BayviewMar2023.0zEmailTrue2003False
15Staff480378NaNe9ExcelsiorJun2023.0zEmailTrue2003False
25Staff703345 to 54 yearsn4Noe ValleyJan2023.0zEmailTrue2011False
35Staff39342140NaNo2Ocean ViewJul2023.0zEmailTrue2003False
45Staff11181035NaNo7OrtegaJul2023.0zEmailTrue2003False
\n", "
" ], "text/plain": [ " Patron Type Code Patron Type Definition Total Checkouts Total Renewals \\\n", "0 5 Staff 53 15 \n", "1 5 Staff 480 378 \n", "2 5 Staff 70 33 \n", "3 5 Staff 3934 2140 \n", "4 5 Staff 1118 1035 \n", "\n", " Age Range Home Library Code Home Library Definition \\\n", "0 NaN b2 Bayview \n", "1 NaN e9 Excelsior \n", "2 45 to 54 years n4 Noe Valley \n", "3 NaN o2 Ocean View \n", "4 NaN o7 Ortega \n", "\n", " Circulation Active Month Circulation Active Year Notice Preference Code \\\n", "0 Mar 2023.0 z \n", "1 Jun 2023.0 z \n", "2 Jan 2023.0 z \n", "3 Jul 2023.0 z \n", "4 Jul 2023.0 z \n", "\n", " Notice Preference Definition Provided Email Address \\\n", "0 Email True \n", "1 Email True \n", "2 Email True \n", "3 Email True \n", "4 Email True \n", "\n", " Year Patron Registered Within San Francisco County \n", "0 2003 False \n", "1 2003 False \n", "2 2011 False \n", "3 2003 False \n", "4 2003 False " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.read_csv(\"../data/Library_Usage.csv\",\n", " low_memory=False\n", " )\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 2023.0\n", "1 2023.0\n", "2 2023.0\n", "3 2023.0\n", "4 2023.0\n", " ... \n", "436285 2022.0\n", "436286 2015.0\n", "436287 2022.0\n", "436288 2020.0\n", "436289 2023.0\n", "Name: Circulation Active Year, Length: 436290, dtype: float64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Circulation Active Year'] = pd.to_numeric(\n", " df['Circulation Active Year'], \n", " errors='coerce'\n", ")\n", "df['Circulation Active Year']" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 436290 entries, 0 to 436289\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 Patron Type Code 436290 non-null int64 \n", " 1 Patron Type Definition 436290 non-null object \n", " 2 Total Checkouts 436290 non-null int64 \n", " 3 Total Renewals 436290 non-null int64 \n", " 4 Age Range 435378 non-null object \n", " 5 Home Library Code 436288 non-null object \n", " 6 Home Library Definition 436290 non-null object \n", " 7 Circulation Active Month 396777 non-null object \n", " 8 Circulation Active Year 396777 non-null float64\n", " 9 Notice Preference Code 393301 non-null object \n", " 10 Notice Preference Definition 436290 non-null object \n", " 11 Provided Email Address 436290 non-null bool \n", " 12 Year Patron Registered 436290 non-null int64 \n", " 13 Within San Francisco County 435083 non-null object \n", "dtypes: bool(1), float64(1), int64(4), object(8)\n", "memory usage: 43.7+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 1900-03-01\n", "1 1900-06-01\n", "2 1900-01-01\n", "3 1900-07-01\n", "4 1900-07-01\n", " ... \n", "436285 1900-10-01\n", "436286 1900-11-01\n", "436287 1900-03-01\n", "436288 1900-09-01\n", "436289 1900-07-01\n", "Name: Circulation Active Month, Length: 436290, dtype: datetime64[ns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Circulation Active Month'] = pd.to_datetime(\n", " df['Circulation Active Month'],\n", " errors='coerce',\n", " format=\"%b\"\n", ")\n", "df['Circulation Active Month']" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 3.0\n", "1 6.0\n", "2 1.0\n", "3 7.0\n", "4 7.0\n", " ... \n", "436285 10.0\n", "436286 11.0\n", "436287 3.0\n", "436288 9.0\n", "436289 7.0\n", "Name: Circulation Active Month, Length: 436290, dtype: float64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Circulation Active Month'] = df['Circulation Active Month'].dt.month\n", "df['Circulation Active Month']" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "df['Membership Duration'] = (\n", " df['Circulation Active Year'] - df['Year Patron Registered']\n", ")*12 + df['Circulation Active Month']" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 243.0\n", "1 246.0\n", "2 145.0\n", "3 247.0\n", "4 247.0\n", " ... \n", "436285 34.0\n", "436286 59.0\n", "436287 3.0\n", "436288 21.0\n", "436289 247.0\n", "Name: Membership Duration, Length: 436290, dtype: float64" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Membership Duration'] = df['Membership Duration'].fillna(0)\n", "df['Membership Duration']" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Patron Type CodePatron Type DefinitionTotal CheckoutsTotal RenewalsAge RangeHome Library CodeHome Library DefinitionCirculation Active MonthCirculation Active YearNotice Preference CodeNotice Preference DefinitionProvided Email AddressYear Patron RegisteredWithin San Francisco CountyMembership Duration
05Staff5315NaNb2Bayview3.02023.0zEmailTrue2003False243.0
15Staff480378NaNe9Excelsior6.02023.0zEmailTrue2003False246.0
25Staff703345 to 54 yearsn4Noe Valley1.02023.0zEmailTrue2011False145.0
35Staff39342140NaNo2Ocean View7.02023.0zEmailTrue2003False247.0
45Staff11181035NaNo7Ortega7.02023.0zEmailTrue2003False247.0
\n", "
" ], "text/plain": [ " Patron Type Code Patron Type Definition Total Checkouts Total Renewals \\\n", "0 5 Staff 53 15 \n", "1 5 Staff 480 378 \n", "2 5 Staff 70 33 \n", "3 5 Staff 3934 2140 \n", "4 5 Staff 1118 1035 \n", "\n", " Age Range Home Library Code Home Library Definition \\\n", "0 NaN b2 Bayview \n", "1 NaN e9 Excelsior \n", "2 45 to 54 years n4 Noe Valley \n", "3 NaN o2 Ocean View \n", "4 NaN o7 Ortega \n", "\n", " Circulation Active Month Circulation Active Year Notice Preference Code \\\n", "0 3.0 2023.0 z \n", "1 6.0 2023.0 z \n", "2 1.0 2023.0 z \n", "3 7.0 2023.0 z \n", "4 7.0 2023.0 z \n", "\n", " Notice Preference Definition Provided Email Address \\\n", "0 Email True \n", "1 Email True \n", "2 Email True \n", "3 Email True \n", "4 Email True \n", "\n", " Year Patron Registered Within San Francisco County Membership Duration \n", "0 2003 False 243.0 \n", "1 2003 False 246.0 \n", "2 2011 False 145.0 \n", "3 2003 False 247.0 \n", "4 2003 False 247.0 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 4 }