Lists and Filtering HW

Popcorn Hack 1

import pandas as pd


def find_students_in_range(df, min_score, max_score):
    return df[(df['Score'] >= min_score) & (df['Score'] <= max_score)]

student_data = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana'],
    'Score': [85, 92, 78, 88]
})

find_students_in_range(student_data, 80, 90)

	Name	Score
0	Alice	85
3	Diana	88

Popcorn Hack 2

def add_letter_grades(df):
    def get_letter(score):
        if score >= 90:
            return 'A'
        elif score >= 80:
            return 'B'
        elif score >= 70:
            return 'C'
        elif score >= 60:
            return 'D'
        else:
            return 'F'
    
    df['Letter'] = df['Score'].apply(get_letter)
    return df


import pandas as pd

student_data = pd.DataFrame({
    'Name': ['Alice', 'Bob', 'Charlie', 'Diana'],
    'Score': [85, 92, 78, 59]
})

add_letter_grades(student_data)

	Name	Score	Letter
0	Alice	85	B
1	Bob	92	A
2	Charlie	78	C
3	Diana	59	F

Popcorn Hack 3

def find_mode(series):
    return series.mode().iloc[0]

import pandas as pd

find_mode(pd.Series([1, 2, 2, 3, 4, 2, 5]))

np.int64(2)

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sqlite3
import folium

# Load data
data = pd.read_csv('/home/shawnray/nighthawk/shawnr_2025/datasets/treas_parking_meters_loc_datasd.csv')
data.head()

	zone	area	sub_area	pole	config_id	config_name	date_inventory	lat	lng	sapid
0	Downtown	Core	1000 FIRST AVE	1-1004	49382	Sunday Mode	2021-01-04 00:00:00	32.715904	-117.163929	SS-000031
1	Downtown	Core - Columbia	1000 FIRST AVE	1-1004	9000	2 Hour Max $1.25 HR 8am-6pm Mon-Sat (NFC)	2018-11-11 00:00:00	32.715904	-117.163929	SS-000031
2	Downtown	Core	1000 FIRST AVE	1-1006	49382	Sunday Mode	2021-01-04 00:00:00	32.716037	-117.163930	SS-000031
3	Downtown	Core - Columbia	1000 FIRST AVE	1-1006	9000	2 Hour Max $1.25 HR 8am-6pm Mon-Sat (NFC)	2018-11-11 00:00:00	32.716037	-117.163930	SS-000031
4	Downtown	Core	1000 FIRST AVE	1-1008	49382	Sunday Mode	2021-01-04 00:00:00	32.716169	-117.163931	SS-000031

zone_counts = data['zone'].value_counts()

plt.figure(figsize=(10, 6))
zone_counts.plot(kind='bar', color='teal')
plt.title('Number of Meters per Zone')
plt.xlabel('Zone')
plt.ylabel('Number of Meters')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

postmanget

area_counts = data['area'].value_counts().sort_values(ascending=False)
sub_area_counts = data['sub_area'].value_counts().sort_values(ascending=False)

plt.figure(figsize=(12, 6))
area_counts.plot(kind='bar', color='orange')
plt.title('Number of Meters per Area')
plt.xlabel('Area')
plt.ylabel('Count')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

postmanget

data['date_inventory'] = pd.to_datetime(data['date_inventory'], errors='coerce')
data['inventory_year'] = data['date_inventory'].dt.year

year_counts = data['inventory_year'].value_counts().sort_index()

plt.figure(figsize=(8, 5))
year_counts.plot(kind='bar', color='purple')
plt.title('Number of Meters Installed by Year')
plt.xlabel('Year')
plt.ylabel('Number of Meters')
plt.tight_layout()
plt.show()

postmanget

# Top 10 most common configurations
top_configs = data['config_name'].value_counts().head(10)

plt.figure(figsize=(12, 6))
sns.barplot(x=top_configs.index, y=top_configs.values, palette='viridis')

plt.title('Top 10 Most Common Parking Meter Configurations')
plt.xlabel('Configuration Name')
plt.ylabel('Count')
plt.xticks(rotation=45, ha='right')  # Tilt the x-axis labels
plt.tight_layout()
plt.show()

/tmp/ipykernel_1014/91221402.py:5: FutureWarning: 

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x=top_configs.index, y=top_configs.values, palette='viridis')

postmanget

map_sd = folium.Map(location=[data['lat'].mean(), data['lng'].mean()], zoom_start=13)

for _, row in data.iterrows():
    folium.CircleMarker(
        location=[row['lat'], row['lng']],
        radius=2,
        popup=f"Pole: {row['pole']}<br>Zone: {row['zone']}",
        color='blue',
        fill=True,
        fill_opacity=0.5
    ).add_to(map_sd)

# Show in Jupyter OR save to HTML
map_sd.save('smartpark_map.html')

conn = sqlite3.connect('smartpark_inventory.db')
data.to_sql('meter_inventory', conn, if_exists='replace', index=False)

query1 = """
SELECT zone, COUNT(*) AS meter_count
FROM meter_inventory
GROUP BY zone
ORDER BY meter_count DESC;
"""
pd.read_sql(query1, conn)

	zone	meter_count
0	Downtown	6196
1	Uptown	4017
2	Mid-City	548
3	City	155
4	San Diego, CA Default Zone	48

query2 = """
SELECT config_name, MIN(date_inventory) as first_seen, MAX(date_inventory) as last_seen
FROM meter_inventory
GROUP BY config_name;
"""
pd.read_sql(query2, conn)

	config_name	first_seen	last_seen
0	1 Hour Max $1.25 HR 10am-8pm Mon-Sat (Mobile P...	2018-11-11 00:00:00	2021-03-23 00:00:00
1	1 Hour Max $1.25 HR 8am-4pm Mon-Fri 8am-6pm Sa...	2018-11-11 00:00:00	2018-11-11 00:00:00
2	1 Hour Max $1.25 HR 8am-4pm Mon-Sat (NFC)	2018-11-11 00:00:00	2018-11-11 00:00:00
3	1 Hour Max $1.25 HR 8am-5pm Mon-Sat, No Parkin...	2018-11-11 00:00:00	2018-11-11 00:00:00
4	1 Hour Max $1.25 HR 8am-6pm Mon-Sat (Mobile Pa...	2018-11-11 00:00:00	2020-02-02 00:00:00
...	...	...	...
105	MSPM-PBS 2 Hour Max $1.25 HR 10am-8pm Mon-Sat	2019-12-29 00:00:00	2019-12-29 00:00:00
106	San Diego Default	2018-11-11 00:00:00	2019-12-29 00:00:00
107	Single Space 2 hour meters Petco Special Event...	2019-12-29 00:00:00	2022-09-12 00:00:00
108	Single Space 30 min meters Petco Special Event...	2019-12-29 00:00:00	2019-12-29 00:00:00
109	Sunday Mode	2021-01-04 00:00:00	2021-01-04 00:00:00

110 rows × 3 columns

Comparison: SQL vs Pandas

Pandas Pros: Easy chaining, good for rapid prototyping, integrates with plotting libraries.
SQL Pros: Declarative, familiar to database users, better for large structured queries.
Pandas Cons: Can get messy with complex filters or joins.
SQL Cons: Harder to visualize data, more verbose syntax for basic operations.

In this project, Pandas was great for quick aggregation and plotting, while SQL was useful for queries like counting meters per zone or filtering by date.