1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import requests
from io import StringIO
# Fetch the data
url = "https://hebbkx1anhila5yf.public.blob.vercel-storage.com/NY%202016%20Party%20Locations-gANytICbCtxX5hfeeGk21AnEZX6Hee.csv"
response = requests.get(url)
data = pd.read_csv(StringIO(response.text))
# Convert 'num_calls' to numeric, replacing any non-numeric values with NaN
data['num_calls'] = pd.to_numeric(data['num_calls'], errors='coerce')
# Set up the plot style
plt.style.use('default')
# Create a 2x2 grid of subplots
fig, axs = plt.subplots(2, 2, figsize=(24, 24))
fig.suptitle('New York 2016 Party Locations Analysis', fontsize=16)
# 1. Distribution of party locations by borough
borough_counts = data['Borough'].value_counts()
axs[0, 0].bar(borough_counts.index, borough_counts.values)
axs[0, 0].set_title('Distribution of Party Locations by Borough')
axs[0, 0].set_xlabel('Borough')
axs[0, 0].set_ylabel('Number of Locations')
axs[0, 0].tick_params(axis='x', rotation=45)
# 2. Top 10 zip codes with the most party calls
top_zips = data.groupby('Incident Zip')['num_calls'].sum().sort_values(ascending=False).head(10)
axs[0, 1].bar(top_zips.index.astype(str), top_zips.values)
axs[0, 1].set_title('Top 10 Zip Codes with Most Party Calls')
axs[0, 1].set_xlabel('Zip Code')
axs[0, 1].set_ylabel('Number of Calls')
axs[0, 1].tick_params(axis='x', rotation=45)
# 3. Pie chart of all zip codes by number of parties
zip_counts = data['Incident Zip'].value_counts()
pie_data = zip_counts
# Create color map
colors = plt.cm.viridis(np.linspace(0, 1, len(pie_data)))
# Sort pie_data by values in descending order
pie_data_sorted = pie_data.sort_values(ascending=False)
# Calculate percentages
total = pie_data_sorted.sum()
percentages = pie_data_sorted / total * 100
# Function to create labels - only show if >= 1%
def make_labels(percentages):
return [f'{percentages.index[i]}' if p >= 1 else '' for i, p in enumerate(percentages)]
def make_autopct(percentages):
def my_autopct(pct):
return f'{pct:.1f}%' if pct >= 1 else ''
return my_autopct
# Create pie chart with labels outside
wedges, texts, autotexts = axs[1, 0].pie(pie_data_sorted.values,
colors=colors,
autopct=make_autopct(percentages),
pctdistance=0.85,
startangle=90,
labels=make_labels(percentages),
labeldistance=1.1) # Move labels outside
# Make percentage labels inside the donut
plt.setp(autotexts, size=8, weight="bold")
# Make zip code labels outside larger and rotated for better readability
plt.setp(texts, size=8)
axs[1, 0].set_title('Distribution of All Zip Codes by Number of Parties')
# Add a circle at the center to create a donut chart effect
center_circle = plt.Circle((0, 0), 0.70, fc='white')
axs[1, 0].add_artist(center_circle)
# 4. Scatter plot of party locations on a map
axs[1, 1].scatter(data['Longitude'], data['Latitude'], alpha=0.6, c=data['Borough'].astype('category').cat.codes)
axs[1, 1].set_title('Party Locations in New York City')
axs[1, 1].set_xlabel('Longitude')
axs[1, 1].set_ylabel('Latitude')
# Create a custom legend for the scatter plot
boroughs = data['Borough'].unique()
for i, borough in enumerate(boroughs):
axs[1, 1].scatter([], [], c=[i], label=borough)
axs[1, 1].legend(title='Borough', loc='center left', bbox_to_anchor=(1, 0.5))
# Adjust layout and display the plot
plt.tight_layout()
plt.show()
print("Analysis complete. Four plots have been generated and displayed.")No Output
Run the code to generate an output.