{ "cells": [ { "cell_type": "code", "execution_count": 56, "id": "7a77604b-97f1-4652-b929-8f66cd4a3b32", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Patron Type CodePatron Type DefinitionTotal CheckoutsTotal RenewalsAge RangeHome Library CodeHome Library DefinitionCirculation Active MonthCirculation Active YearNotice Preference CodeNotice Preference DefinitionProvided Email AddressYear Patron RegisteredWithin San Francisco County
05Staff5315NaNb2BayviewMar2023.0zEmailTrue2003False
15Staff480378NaNe9ExcelsiorJun2023.0zEmailTrue2003False
25Staff703345 to 54 yearsn4Noe ValleyJan2023.0zEmailTrue2011False
35Staff39342140NaNo2Ocean ViewJul2023.0zEmailTrue2003False
45Staff11181035NaNo7OrtegaJul2023.0zEmailTrue2003False
.............................................
4362850Adult2635 to 44 yearsyjjBookmobileOct2022.0zEmailTrue2020NaN
4362862Teen3010 to 19 yearsyjjBookmobileNov2015.0zEmailTrue2011NaN
4362870Adult6060 to 64 yearsylwBookmobileMar2022.0zEmailTrue2022NaN
43628815Teacher Card4035 to 44 yearsylwBookmobileSep2020.0zEmailTrue2019NaN
43628955Retired Staff30910175 years and overylwBookmobileJul2023.0zEmailTrue2003NaN
\n", "

436290 rows × 14 columns

\n", "
" ], "text/plain": [ " Patron Type Code Patron Type Definition Total Checkouts \\\n", "0 5 Staff 53 \n", "1 5 Staff 480 \n", "2 5 Staff 70 \n", "3 5 Staff 3934 \n", "4 5 Staff 1118 \n", "... ... ... ... \n", "436285 0 Adult 2 \n", "436286 2 Teen 3 \n", "436287 0 Adult 6 \n", "436288 15 Teacher Card 4 \n", "436289 55 Retired Staff 309 \n", "\n", " Total Renewals Age Range Home Library Code \\\n", "0 15 NaN b2 \n", "1 378 NaN e9 \n", "2 33 45 to 54 years n4 \n", "3 2140 NaN o2 \n", "4 1035 NaN o7 \n", "... ... ... ... \n", "436285 6 35 to 44 years yjj \n", "436286 0 10 to 19 years yjj \n", "436287 0 60 to 64 years ylw \n", "436288 0 35 to 44 years ylw \n", "436289 101 75 years and over ylw \n", "\n", " Home Library Definition Circulation Active Month \\\n", "0 Bayview Mar \n", "1 Excelsior Jun \n", "2 Noe Valley Jan \n", "3 Ocean View Jul \n", "4 Ortega Jul \n", "... ... ... \n", "436285 Bookmobile Oct \n", "436286 Bookmobile Nov \n", "436287 Bookmobile Mar \n", "436288 Bookmobile Sep \n", "436289 Bookmobile Jul \n", "\n", " Circulation Active Year Notice Preference Code \\\n", "0 2023.0 z \n", "1 2023.0 z \n", "2 2023.0 z \n", "3 2023.0 z \n", "4 2023.0 z \n", "... ... ... \n", "436285 2022.0 z \n", "436286 2015.0 z \n", "436287 2022.0 z \n", "436288 2020.0 z \n", "436289 2023.0 z \n", "\n", " Notice Preference Definition Provided Email Address \\\n", "0 Email True \n", "1 Email True \n", "2 Email True \n", "3 Email True \n", "4 Email True \n", "... ... ... \n", "436285 Email True \n", "436286 Email True \n", "436287 Email True \n", "436288 Email True \n", "436289 Email True \n", "\n", " Year Patron Registered Within San Francisco County \n", "0 2003 False \n", "1 2003 False \n", "2 2011 False \n", "3 2003 False \n", "4 2003 False \n", "... ... ... \n", "436285 2020 NaN \n", "436286 2011 NaN \n", "436287 2022 NaN \n", "436288 2019 NaN \n", "436289 2003 NaN \n", "\n", "[436290 rows x 14 columns]" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "df = pd.read_csv(\"../data/Library_Usage.csv\",\n", " low_memory=False\n", " )\n", "#eine erste Übersicht verschaffen:\n", "df" ] }, { "cell_type": "code", "execution_count": 3, "id": "ca23bd32-ecfb-4ea4-aaae-01c08e37ebfc", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "14" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 1. Wie viele Merkmale besitzt der Datensatz? Die Merkmale entsprechen den Spalten\n", "len(df.columns)" ] }, { "cell_type": "code", "execution_count": 4, "id": "11ad2e54-ad95-4f80-a533-d1700c3b6f0a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "436290" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 2.Wie groß ist die Stichprobengröße des Datensatzes?\n", "len(df)" ] }, { "cell_type": "code", "execution_count": 5, "id": "b664e067-47f6-4ac1-bd95-a2862d24adb6", "metadata": {}, "outputs": [], "source": [ "# 3. Wer oder was sind die Merkmalsträger? " ] }, { "cell_type": "markdown", "id": "7a96c441-f9dd-4b1d-aec3-563478b4e484", "metadata": {}, "source": [ " Bibliothekskunden der SF Public Library." ] }, { "cell_type": "code", "execution_count": 6, "id": "ef440c74-68b2-44e3-a7b1-3c028da134a5", "metadata": {}, "outputs": [], "source": [ "# 4. Von wann bis wann wurden die Daten erhoben?" ] }, { "cell_type": "markdown", "id": "832ab1d7-36e1-48e5-ba53-caf9af4037ac", "metadata": {}, "source": [ "Aus dem Datensatz lässt sich herauslesen, dass die ersten Nutzeregistrierungen 2003 waren Einführung Bibliothekssystem), die letzten 2022. Der Datensatz selbst wurde zuletzt 2023 aktualisiert (laut Webseite).\n", "\n", "Weiterführende Informationen sind auf der Webseite verfügbar: https://data.sfgov.org/Culture-and-Recreation/Library-Usage/qzz6-2jup/about_data" ] }, { "cell_type": "code", "execution_count": 7, "id": "d5b4fc74-4468-4cb4-bcba-0f63e7cb7719", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2022 0.119583\n", "2019 0.101515\n", "2003 0.094052\n", "2021 0.074799\n", "2020 0.073369\n", "2017 0.067203\n", "2018 0.065603\n", "2023 0.060249\n", "2016 0.049781\n", "2015 0.046201\n", "2014 0.036593\n", "2013 0.030553\n", "2012 0.029066\n", "2011 0.028348\n", "2009 0.026109\n", "2010 0.025779\n", "2008 0.023154\n", "2007 0.014862\n", "2006 0.012042\n", "2005 0.010910\n", "2004 0.010229\n", "Name: Year Patron Registered, dtype: float64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Year Patron Registered'].value_counts(normalize=True)" ] }, { "cell_type": "code", "execution_count": 8, "id": "9ea4b26b-6139-4698-948a-13ce6ac985f4", "metadata": {}, "outputs": [], "source": [ "# 5. Wie lässt sich die Grundgesamtheit beschreiben? Handelt es sich um eine Vollerhebung?" ] }, { "cell_type": "markdown", "id": "9410c2a5-c07d-4768-aa05-1906d777f114", "metadata": {}, "source": [ "Grundgesamtheit sind alle Bibliothekskunden der San Francisco Library. Wahrscheinlich handelt es sich um eine Vollerhebung. Es lässt sich diskutieren, ob die Grundgesamtheit größer gefasst werden kann (z.B. alle Kunden von Bibliotheken in den USA oder alle Kunden von öffentlichen Bibliotheken). Was meinst Du: sind die Daten repräsentativ für diese Grundgesamtheiten?" ] }, { "cell_type": "code", "execution_count": 9, "id": "1f293941-1899-45cb-842a-ea12ab9e4da9", "metadata": {}, "outputs": [], "source": [ "# 6. Welche Merkmale sind stetig? Welche diskret?\n", "#\n", "#Aus der Beschreibung der Merkmale des Datensatzes aber auch einfach bereits mit\n", "#df\n", "#lässt sich erkennen, dass nur die Variablen 'Total Checkouts' und 'Total Renewals' stetig sind. \n", "#Alle anderen sind diskret" ] }, { "cell_type": "code", "execution_count": 10, "id": "c381957c-c4f7-4c94-85f0-81d29e802ebf", "metadata": {}, "outputs": [], "source": [ "# 7. Welchem Skalenniveau entsprechen die einzelnen Merkmale (Nominal-, Ordinal- oder Metrische Skala)? \n", "#\n", "# Siehe wieder \n", "#df" ] }, { "cell_type": "code", "execution_count": 11, "id": "1f92e797-4613-402c-aed1-5336d4037d57", "metadata": {}, "outputs": [], "source": [ "# Ordinal: 'Age Range'" ] }, { "cell_type": "code", "execution_count": 12, "id": "96bccd59-35b6-4ba0-b1d1-28e36cd65c33", "metadata": {}, "outputs": [], "source": [ "# Metrisch: 'Total Checkouts', 'Total Renewals', 'Circulation Active Year', 'Year Patron registered'" ] }, { "cell_type": "code", "execution_count": 13, "id": "d4e121f7-5d4c-4505-a205-a73b8c02347e", "metadata": {}, "outputs": [], "source": [ "# Nominal: 'Patron Type Code', 'Patron Type Definition', 'Home Library Code', \n", "# 'Home Library Definition', 'Circulation Active Month', 'Notice Preference Code', \n", "# 'Notice Preference Definition', 'Provided Email Address', 'Within San Francisco County'\n" ] }, { "cell_type": "code", "execution_count": 14, "id": "b142bd18-a0a3-488e-91f0-786c49ebc28c", "metadata": {}, "outputs": [], "source": [ "# 8. Enthält der Datensatz fehlende Werte? Ja, z.B. 'Circulation Active Year'" ] }, { "cell_type": "code", "execution_count": 15, "id": "b14365da-88c7-4ca4-bad3-f9f96b371f1b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Patron Type CodePatron Type DefinitionTotal CheckoutsTotal RenewalsAge RangeHome Library CodeHome Library DefinitionCirculation Active MonthCirculation Active YearNotice Preference CodeNotice Preference DefinitionProvided Email AddressYear Patron RegisteredWithin San Francisco County
451Juvenile000 to 9 yearsxMainNaNNaNzEmailTrue2021False
642Teen0010 to 19 yearsxMainNaNNaNzEmailTrue2021False
653Senior0075 years and overm2MarinaNaNNaNzEmailTrue2021False
7210Visitor0025 to 34 yearsxMainNaNNaNzEmailTrue2023False
861Juvenile000 to 9 yearsxMainNaNNaNzEmailTrue2023False
.............................................
4362622Teen0010 to 19 yearsxMainNaNNaNNaNNoneFalse2023NaN
4362682Teen0010 to 19 yearsxMainNaNNaNNaNNoneFalse2023NaN
4362730Adult0035 to 44 yearsxMainNaNNaNNaNNoneFalse2021NaN
4362752Teen0010 to 19 yearsxMainNaNNaNNaNNoneFalse2023NaN
4362800Adult0025 to 34 yearsxMainNaNNaNNaNNoneFalse2023NaN
\n", "

39513 rows × 14 columns

\n", "
" ], "text/plain": [ " Patron Type Code Patron Type Definition Total Checkouts \\\n", "45 1 Juvenile 0 \n", "64 2 Teen 0 \n", "65 3 Senior 0 \n", "72 10 Visitor 0 \n", "86 1 Juvenile 0 \n", "... ... ... ... \n", "436262 2 Teen 0 \n", "436268 2 Teen 0 \n", "436273 0 Adult 0 \n", "436275 2 Teen 0 \n", "436280 0 Adult 0 \n", "\n", " Total Renewals Age Range Home Library Code \\\n", "45 0 0 to 9 years x \n", "64 0 10 to 19 years x \n", "65 0 75 years and over m2 \n", "72 0 25 to 34 years x \n", "86 0 0 to 9 years x \n", "... ... ... ... \n", "436262 0 10 to 19 years x \n", "436268 0 10 to 19 years x \n", "436273 0 35 to 44 years x \n", "436275 0 10 to 19 years x \n", "436280 0 25 to 34 years x \n", "\n", " Home Library Definition Circulation Active Month \\\n", "45 Main NaN \n", "64 Main NaN \n", "65 Marina NaN \n", "72 Main NaN \n", "86 Main NaN \n", "... ... ... \n", "436262 Main NaN \n", "436268 Main NaN \n", "436273 Main NaN \n", "436275 Main NaN \n", "436280 Main NaN \n", "\n", " Circulation Active Year Notice Preference Code \\\n", "45 NaN z \n", "64 NaN z \n", "65 NaN z \n", "72 NaN z \n", "86 NaN z \n", "... ... ... \n", "436262 NaN NaN \n", "436268 NaN NaN \n", "436273 NaN NaN \n", "436275 NaN NaN \n", "436280 NaN NaN \n", "\n", " Notice Preference Definition Provided Email Address \\\n", "45 Email True \n", "64 Email True \n", "65 Email True \n", "72 Email True \n", "86 Email True \n", "... ... ... \n", "436262 None False \n", "436268 None False \n", "436273 None False \n", "436275 None False \n", "436280 None False \n", "\n", " Year Patron Registered Within San Francisco County \n", "45 2021 False \n", "64 2021 False \n", "65 2021 False \n", "72 2023 False \n", "86 2023 False \n", "... ... ... \n", "436262 2023 NaN \n", "436268 2023 NaN \n", "436273 2021 NaN \n", "436275 2023 NaN \n", "436280 2023 NaN \n", "\n", "[39513 rows x 14 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df[df['Circulation Active Year'].isna()]" ] }, { "cell_type": "code", "execution_count": 17, "id": "e4ae3d65-3ac8-4bf2-922a-41b14a23ede1", "metadata": {}, "outputs": [], "source": [ "# 9. Handelt es sich um Querschnitts-, Längsschnitss- oder Paneldaten? \n", "#\n", "# Querschnittsdaten" ] }, { "cell_type": "code", "execution_count": 99, "id": "aef364f3-48f3-405c-84f6-e370e2b76481", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Patron Type CodePatron Type DefinitionTotal CheckoutsTotal RenewalsAge RangeHome Library CodeHome Library DefinitionCirculation Active MonthCirculation Active YearNotice Preference CodeNotice Preference DefinitionProvided Email AddressYear Patron RegisteredWithin San Francisco County
314Welcome1120 to 24 yearsg4Glen ParkAug2022.0zEmailTrue2022False
3710Visitor3320 to 24 yearsp5PortolaAug2022.0zEmailTrue2022False
4110Visitor3320 to 24 yearsg6Golden Gate ValleyNov2022.0zEmailTrue2022False
560Adult5420 to 24 yearsxMainMay2022.0zEmailTrue2021False
5710Visitor6120 to 24 yearsxMainJul2023.0zEmailTrue2023False
.............................................
4362150Adult0020 to 24 yearsxMainNaNNaNzEmailTrue2023NaN
43624016Digital Access Card0020 to 24 yearsxMainJun2023.0NaNNoneFalse2023NaN
4362454Welcome0020 to 24 yearsxMainAug2021.0NaNNoneFalse2021NaN
4362670Adult0020 to 24 yearsxMainMay2023.0NaNNoneFalse2023NaN
43627416Digital Access Card0020 to 24 yearsxMainDec2021.0NaNNoneFalse2021NaN
\n", "

25381 rows × 14 columns

\n", "
" ], "text/plain": [ " Patron Type Code Patron Type Definition Total Checkouts \\\n", "31 4 Welcome 1 \n", "37 10 Visitor 3 \n", "41 10 Visitor 3 \n", "56 0 Adult 5 \n", "57 10 Visitor 6 \n", "... ... ... ... \n", "436215 0 Adult 0 \n", "436240 16 Digital Access Card 0 \n", "436245 4 Welcome 0 \n", "436267 0 Adult 0 \n", "436274 16 Digital Access Card 0 \n", "\n", " Total Renewals Age Range Home Library Code \\\n", "31 1 20 to 24 years g4 \n", "37 3 20 to 24 years p5 \n", "41 3 20 to 24 years g6 \n", "56 4 20 to 24 years x \n", "57 1 20 to 24 years x \n", "... ... ... ... \n", "436215 0 20 to 24 years x \n", "436240 0 20 to 24 years x \n", "436245 0 20 to 24 years x \n", "436267 0 20 to 24 years x \n", "436274 0 20 to 24 years x \n", "\n", " Home Library Definition Circulation Active Month \\\n", "31 Glen Park Aug \n", "37 Portola Aug \n", "41 Golden Gate Valley Nov \n", "56 Main May \n", "57 Main Jul \n", "... ... ... \n", "436215 Main NaN \n", "436240 Main Jun \n", "436245 Main Aug \n", "436267 Main May \n", "436274 Main Dec \n", "\n", " Circulation Active Year Notice Preference Code \\\n", "31 2022.0 z \n", "37 2022.0 z \n", "41 2022.0 z \n", "56 2022.0 z \n", "57 2023.0 z \n", "... ... ... \n", "436215 NaN z \n", "436240 2023.0 NaN \n", "436245 2021.0 NaN \n", "436267 2023.0 NaN \n", "436274 2021.0 NaN \n", "\n", " Notice Preference Definition Provided Email Address \\\n", "31 Email True \n", "37 Email True \n", "41 Email True \n", "56 Email True \n", "57 Email True \n", "... ... ... \n", "436215 Email True \n", "436240 None False \n", "436245 None False \n", "436267 None False \n", "436274 None False \n", "\n", " Year Patron Registered Within San Francisco County \n", "31 2022 False \n", "37 2022 False \n", "41 2022 False \n", "56 2021 False \n", "57 2023 False \n", "... ... ... \n", "436215 2023 NaN \n", "436240 2023 NaN \n", "436245 2021 NaN \n", "436267 2023 NaN \n", "436274 2021 NaN \n", "\n", "[25381 rows x 14 columns]" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "row_filter = (df['Age Range'] == '20 to 24 years') \n", "df.loc[row_filter]\n" ] }, { "cell_type": "code", "execution_count": 97, "id": "c5dd9fc7-b4cc-4ac2-b4c2-31c5c217bc0d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Adult 274682\n", "Juvenile 53281\n", "Senior 49332\n", "Teen 40561\n", "Welcome 9966\n", "Digital Access Card 3714\n", "Teacher Card 3234\n", "Staff 806\n", "Retired Staff 215\n", "Visitor 148\n", "Library By Mail 120\n", "At User Adult 118\n", "At User Senior 78\n", "At User Welcome 13\n", "At User Teen 8\n", "Business 7\n", "At User Juvenile 7\n", "Name: Patron Type Definition, dtype: int64" ] }, "execution_count": 97, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Patron Type Definition'].value_counts()" ] }, { "cell_type": "code", "execution_count": 98, "id": "b6878b7b-5663-4aa3-989f-a4b9b96b9024", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "25 to 34 years 92669\n", "35 to 44 years 81719\n", "10 to 19 years 66857\n", "45 to 54 years 48319\n", "0 to 9 years 32692\n", "65 to 74 years 31714\n", "20 to 24 years 25381\n", "75 years and over 19297\n", "55 to 59 years 19076\n", "60 to 64 years 17654\n", "Name: Age Range, dtype: int64" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['Age Range'].value_counts()" ] }, { "cell_type": "code", "execution_count": 88, "id": "56778225-7e12-402a-abe9-4b58f71ad714", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Notice Preference DefinitionEmailNoneAll
Patron Type Definition
Adult0.6577990.3714670.629586
At User Adult0.0002520.0004420.000270
At User Juvenile0.0000130.0000470.000016
At User Senior0.0001450.0004880.000179
At User Teen0.0000200.0000000.000018
At User Welcome0.0000280.0000470.000030
Business0.0000180.0000000.000016
Digital Access Card0.0050390.0402890.008513
Juvenile0.1144110.1926770.122123
Library By Mail0.0002570.0004420.000275
Retired Staff0.0005110.0003260.000493
Senior0.1040530.1955850.113072
Staff0.0020140.0003260.001847
Teacher Card0.0079200.0027680.007413
Teen0.0855910.1604600.092968
Visitor0.0003560.0001860.000339
Welcome0.0215740.0344510.022843
\n", "
" ], "text/plain": [ "Notice Preference Definition Email None All\n", "Patron Type Definition \n", "Adult 0.657799 0.371467 0.629586\n", "At User Adult 0.000252 0.000442 0.000270\n", "At User Juvenile 0.000013 0.000047 0.000016\n", "At User Senior 0.000145 0.000488 0.000179\n", "At User Teen 0.000020 0.000000 0.000018\n", "At User Welcome 0.000028 0.000047 0.000030\n", "Business 0.000018 0.000000 0.000016\n", "Digital Access Card 0.005039 0.040289 0.008513\n", "Juvenile 0.114411 0.192677 0.122123\n", "Library By Mail 0.000257 0.000442 0.000275\n", "Retired Staff 0.000511 0.000326 0.000493\n", "Senior 0.104053 0.195585 0.113072\n", "Staff 0.002014 0.000326 0.001847\n", "Teacher Card 0.007920 0.002768 0.007413\n", "Teen 0.085591 0.160460 0.092968\n", "Visitor 0.000356 0.000186 0.000339\n", "Welcome 0.021574 0.034451 0.022843" ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "\n", "\n", "pd.crosstab(\n", " df['Patron Type Definition'],\n", " df['Notice Preference Definition'],\n", " margins=True,\n", " normalize=1\n", ")\n" ] }, { "cell_type": "code", "execution_count": 89, "id": "70ce8f8e-27d3-480d-9383-23bf07e883b6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Notice Preference DefinitionEmailNoneAll
Age Range
0 to 9 years0.0732280.0921130.075089
10 to 19 years0.1399740.2778530.153561
20 to 24 years0.0578410.0624650.058296
25 to 34 years0.2247280.1041630.212847
35 to 44 years0.1977660.0955850.187697
45 to 54 years0.1150780.0735130.110982
55 to 59 years0.0441710.0405560.043815
60 to 64 years0.0395920.0492960.040549
65 to 74 years0.0692280.1059110.072842
75 years and over0.0383950.0985460.044322
\n", "
" ], "text/plain": [ "Notice Preference Definition Email None All\n", "Age Range \n", "0 to 9 years 0.073228 0.092113 0.075089\n", "10 to 19 years 0.139974 0.277853 0.153561\n", "20 to 24 years 0.057841 0.062465 0.058296\n", "25 to 34 years 0.224728 0.104163 0.212847\n", "35 to 44 years 0.197766 0.095585 0.187697\n", "45 to 54 years 0.115078 0.073513 0.110982\n", "55 to 59 years 0.044171 0.040556 0.043815\n", "60 to 64 years 0.039592 0.049296 0.040549\n", "65 to 74 years 0.069228 0.105911 0.072842\n", "75 years and over 0.038395 0.098546 0.044322" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(\n", " df['Age Range'],\n", " df['Notice Preference Definition'],\n", " margins=True,\n", " normalize=1\n", ")" ] }, { "cell_type": "code", "execution_count": 100, "id": "27703beb-b40d-43cb-967d-42bae242af4f", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Age Range0 to 9 years10 to 19 years20 to 24 years25 to 34 years35 to 44 years45 to 54 years55 to 59 years60 to 64 years65 to 74 years75 years and overAll
Patron Type Definition
Adult0.0011930.0813230.9393640.9341530.9448110.9506610.9525580.9582530.0137160.0010360.630652
At User Adult0.0000000.0000450.0001970.0000860.0003180.0006000.0010480.0013590.0000000.0000000.000264
At User Juvenile0.0001220.0000450.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000016
At User Senior0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0015140.0015550.000179
At User Teen0.0000000.0001200.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.000018
At User Welcome0.0000000.0000150.0000390.0000540.0000370.0000410.0000000.0000000.0000000.0000520.000030
Business0.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000000.0000320.0000000.000002
Digital Access Card0.0000920.0008970.0105980.0128740.0142680.0105550.0089640.0070240.0040990.0024870.008439
Juvenile0.9976140.3088530.0000390.0000110.0000240.0000000.0000520.0000000.0000000.0000000.122349
Library By Mail0.0000000.0000000.0001180.0000860.0001100.0001030.0003150.0002270.0008200.0030570.000276
Retired Staff0.0000000.0000000.0000000.0000110.0000120.0000210.0005770.0007360.0024280.0018660.000322
Senior0.0002140.0000000.0000000.0000000.0000000.0000210.0001050.0009060.9579050.9803080.113285
Staff0.0000310.0000000.0004730.0007880.0012110.0024630.0024640.0011900.0009460.0002070.000933
Teacher Card0.0001530.0000300.0018910.0085030.0111360.0137830.0127390.0092330.0047300.0013470.006893
Teen0.0000310.6064740.0000790.0000000.0000120.0000000.0000000.0000000.0000000.0000000.093140
Visitor0.0000000.0000900.0011030.0004750.0003180.0003310.0002620.0003970.0003780.0002070.000340
Welcome0.0005510.0021090.0460970.0429590.0277410.0214200.0209160.0206750.0134330.0078770.022863
\n", "
" ], "text/plain": [ "Age Range 0 to 9 years 10 to 19 years 20 to 24 years \\\n", "Patron Type Definition \n", "Adult 0.001193 0.081323 0.939364 \n", "At User Adult 0.000000 0.000045 0.000197 \n", "At User Juvenile 0.000122 0.000045 0.000000 \n", "At User Senior 0.000000 0.000000 0.000000 \n", "At User Teen 0.000000 0.000120 0.000000 \n", "At User Welcome 0.000000 0.000015 0.000039 \n", "Business 0.000000 0.000000 0.000000 \n", "Digital Access Card 0.000092 0.000897 0.010598 \n", "Juvenile 0.997614 0.308853 0.000039 \n", "Library By Mail 0.000000 0.000000 0.000118 \n", "Retired Staff 0.000000 0.000000 0.000000 \n", "Senior 0.000214 0.000000 0.000000 \n", "Staff 0.000031 0.000000 0.000473 \n", "Teacher Card 0.000153 0.000030 0.001891 \n", "Teen 0.000031 0.606474 0.000079 \n", "Visitor 0.000000 0.000090 0.001103 \n", "Welcome 0.000551 0.002109 0.046097 \n", "\n", "Age Range 25 to 34 years 35 to 44 years 45 to 54 years \\\n", "Patron Type Definition \n", "Adult 0.934153 0.944811 0.950661 \n", "At User Adult 0.000086 0.000318 0.000600 \n", "At User Juvenile 0.000000 0.000000 0.000000 \n", "At User Senior 0.000000 0.000000 0.000000 \n", "At User Teen 0.000000 0.000000 0.000000 \n", "At User Welcome 0.000054 0.000037 0.000041 \n", "Business 0.000000 0.000000 0.000000 \n", "Digital Access Card 0.012874 0.014268 0.010555 \n", "Juvenile 0.000011 0.000024 0.000000 \n", "Library By Mail 0.000086 0.000110 0.000103 \n", "Retired Staff 0.000011 0.000012 0.000021 \n", "Senior 0.000000 0.000000 0.000021 \n", "Staff 0.000788 0.001211 0.002463 \n", "Teacher Card 0.008503 0.011136 0.013783 \n", "Teen 0.000000 0.000012 0.000000 \n", "Visitor 0.000475 0.000318 0.000331 \n", "Welcome 0.042959 0.027741 0.021420 \n", "\n", "Age Range 55 to 59 years 60 to 64 years 65 to 74 years \\\n", "Patron Type Definition \n", "Adult 0.952558 0.958253 0.013716 \n", "At User Adult 0.001048 0.001359 0.000000 \n", "At User Juvenile 0.000000 0.000000 0.000000 \n", "At User Senior 0.000000 0.000000 0.001514 \n", "At User Teen 0.000000 0.000000 0.000000 \n", "At User Welcome 0.000000 0.000000 0.000000 \n", "Business 0.000000 0.000000 0.000032 \n", "Digital Access Card 0.008964 0.007024 0.004099 \n", "Juvenile 0.000052 0.000000 0.000000 \n", "Library By Mail 0.000315 0.000227 0.000820 \n", "Retired Staff 0.000577 0.000736 0.002428 \n", "Senior 0.000105 0.000906 0.957905 \n", "Staff 0.002464 0.001190 0.000946 \n", "Teacher Card 0.012739 0.009233 0.004730 \n", "Teen 0.000000 0.000000 0.000000 \n", "Visitor 0.000262 0.000397 0.000378 \n", "Welcome 0.020916 0.020675 0.013433 \n", "\n", "Age Range 75 years and over All \n", "Patron Type Definition \n", "Adult 0.001036 0.630652 \n", "At User Adult 0.000000 0.000264 \n", "At User Juvenile 0.000000 0.000016 \n", "At User Senior 0.001555 0.000179 \n", "At User Teen 0.000000 0.000018 \n", "At User Welcome 0.000052 0.000030 \n", "Business 0.000000 0.000002 \n", "Digital Access Card 0.002487 0.008439 \n", "Juvenile 0.000000 0.122349 \n", "Library By Mail 0.003057 0.000276 \n", "Retired Staff 0.001866 0.000322 \n", "Senior 0.980308 0.113285 \n", "Staff 0.000207 0.000933 \n", "Teacher Card 0.001347 0.006893 \n", "Teen 0.000000 0.093140 \n", "Visitor 0.000207 0.000340 \n", "Welcome 0.007877 0.022863 " ] }, "execution_count": 100, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(\n", " df['Patron Type Definition'],\n", " df['Age Range'],\n", " margins=True,\n", " normalize=1\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "1997eb97-92f5-4571-b5e3-2cddb2f87c4d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" } }, "nbformat": 4, "nbformat_minor": 5 }