Python을 이용한 데이터 분석

AI 기본 과정

Python을 이용한 데이터 분석 - Matplotlib (4)

넝구리 2022. 5. 6. 23:30

ICT이노베이션스퀘어 AI기본과정(CNU) 교육을 듣고 정리한 내용입니다.

AI기본과정(CNU) 교육 자료를 참고하였습니다.

대중교통 데이터 프로젝트

지하철 유무임별 데이터

subwayfee.csv

0.03MB

1. 지하철 유무임별 이용현황 데이터 정제

데이터 출력

import csv

f = open('C:/Users/82109/OneDrive/바탕 화면/subwayfee.csv')
subway = csv.reader(f)

for row in subway:
    print(row)

인원 숫자 데이터를 정수로 변환

import csv

f = open('C:/Users/82109/OneDrive/바탕 화면/subwayfee.csv')
subway = csv.reader(f)
next(subway)

for row in subway:
    for i in range(4,8):
        row[i] = int(row[i])     # 정수 변환

2. 유임 승차 비율이 가장 높은 역은?

(유임승차 / 무임승차) 비율의 최대값 구하기

import csv

f = open('C:/Users/82109/OneDrive/바탕 화면/subwayfee.csv')
subway = csv.reader(f)
next(subway)

maximum = 0
rate = 0

for row in subway:
    for i in range(4,8):
        row[i] = int(row[i])
    if row[6] != 0:     # row[6] 값이 0이 아니라면
        rate = row[4] / row[6]
        if rate > maximum:
            maximum = rate
            print(row, round(rate, 2))

# 이전 행의 rate보다 큰 것만 차례로 출력

유임 승차 비율이 가장 높은 역 찾기

import csv

f = open('C:/Users/82109/OneDrive/바탕 화면/subwayfee.csv')
subway = csv.reader(f)
next(subway)

maximum = 0
rate = 0
max_station = ""

for row in subway:
    for i in range(4,8):
        row[i] = int(row[i])
    if row[6] != 0 and (row[4] + row[6]) > 100000:     # row[6]이 0이 아니고 (row[4] + row[6])이 100000보다 크다면
        rate = row[4] / (row[4] + row[6])     # 유임 승차 비율 : row[4] / (row[4] + row[6])
        if rate > maximum:
            maximum = rate
            max_station = row[3] + " " + row[1]

print(max_station, round(maximum*100, 2))

3. 유무임 승하차 인원이 가장 많은 역은?

유무인 승하차 인원이 가장 많은 역 찾기

import csv

f = open('C:/Users/82109/OneDrive/바탕 화면/subwayfee.csv')
subway = csv.reader(f)
next(subway)

maximum = [0] * 4
max_station = [""] * 4
label = ["유임승차","유임하차","무임승차","무임하차"]

for row in subway:
    for i in range(4,8):
        row[i] = int(row[i])
        if row[i] > maximum[i-4]:
            maximum[i-4] = row[i]
            max_station[i-4] = row[3] + " " + row[1]

for i in range(4):
    print(label[i] + " : " + max_station[i], maximum[i])

4. 모든 역의 유무임 승하차 비율은?

파이 차트

import csv
import matplotlib.pyplot as plt

f = open('C:/Users/82109/OneDrive/바탕 화면/subwayfee.csv')
subway = csv.reader(f)
next(subway)

label = ["유임승차","유임하차","무임승차","무임하차"]

for row in subway:
    for i in range(4,8):
        row[i] = int(row[i])
    plt.pie(row[4:8], labels = label, autopct = "%.1f%%")
    plt.axis("equal")
    plt.show()

지하철 시간대별 데이터

subwaytime.csv

0.15MB

1. 지하철 시간대별 이용 현황 데이터 정제

데이터 출력

import csv

f = open("C:/Users/82109/OneDrive/바탕 화면/subwaytime.csv")
sub_time = csv.reader(f)

for row in sub_time:
    print(row)

문자열을 정수로 변환

import csv

f = open("C:/Users/82109/OneDrive/바탕 화면/subwaytime.csv")
sub_time = csv.reader(f)
next(sub_time)
next(sub_time)

for row in sub_time:
    row[4:] = map(int, row[4:])     # 정수로 변환
    print(row)

2. 출근 시간대에 승하차가 가장 많은 역은?

아침 7시 승차 데이터 개수 및 인원 수 출력

import csv

f = open("C:/Users/82109/OneDrive/바탕 화면/subwaytime.csv")
sub_time = csv.reader(f)
next(sub_time)
next(sub_time)

ride = []

for row in sub_time:
    row[4:] = map(int, row[4:])
    ride.append(row[10])

print(len(ride))     # 승차 데이터의 개수 확인
print(ride)

아침 7시 승차 데이터를 막대 그래프로 표현

import csv
import matplotlib.pyplot as plt

f = open("C:/Users/82109/OneDrive/바탕 화면/subwaytime.csv")
sub_time = csv.reader(f)
next(sub_time)
next(sub_time)

ride = []

for row in sub_time:
    row[4:] = map(int, row[4:])
    ride.append(row[10])

plt.style.use("ggplot")
plt.bar(range(len(ride)), ride)
plt.show()

아침 7시 승차 데이터 오름차순 정렬하기

import csv
import matplotlib.pyplot as plt

f = open("C:/Users/82109/OneDrive/바탕 화면/subwaytime.csv")
sub_time = csv.reader(f)
next(sub_time)
next(sub_time)

ride = []

for row in sub_time:
    row[4:] = map(int, row[4:])
    ride.append(row[10])

ride.sort()     # 오름차순 정렬

plt.style.use("ggplot")
plt.bar(range(len(ride)), ride)
plt.show()

아침 7 ~ 9시 승차 인원 더하기

import csv
import matplotlib.pyplot as plt

f = open("C:/Users/82109/OneDrive/바탕 화면/subwaytime.csv")
sub_time = csv.reader(f)
next(sub_time)
next(sub_time)

ride = []

for row in sub_time:
    row[4:] = map(int, row[4:])     
    ride.append(sum(row[10:15:2]))     # row[10:15:2] 10열부터 14열까지 2 간격으로

ride.sort()

plt.style.use("ggplot")
plt.bar(range(len(ride)), ride)
plt.show()

아침 7 ~ 9시 승차 인원 최대 역 찾기

import csv

f = open("C:/Users/82109/OneDrive/바탕 화면/subwaytime.csv")
sub_time = csv.reader(f)
next(sub_time)
next(sub_time)

maximum = 0
max_station = ""

for row in sub_time:
    row[4:] = map(int, row[4:])
    if sum(row[10:15:2]) > maximum:
        maximum = sum(row[10:15:2])
        max_station = row[3] + "(" + row[1] + ")"

print(max_station, maximum)

아침 7 ~ 9시 하차 인원 최대 역 찾기

import csv

f = open("C:/Users/82109/OneDrive/바탕 화면/subwaytime.csv")
sub_time = csv.reader(f)
next(sub_time)
next(sub_time)

maximum = 0
max_station = ""

for row in sub_time:
    row[4:] = map(int, row[4:])
    if sum(row[11:16:2]) > maximum:     # row[11:16:2] 11열부터 15열까지 2 간격으로
        maximum = sum(row[11:16:2])
        max_station = row[3] + "(" + row[1] + ")"

print(max_station, maximum)

3. 밤 11시(23시)에 사람들이 가장 많이 타는 역은?

밤 11시에 승차 인원이 가장 많은 역 찾기

import csv

f = open("C:/Users/82109/OneDrive/바탕 화면/subwaytime.csv")
sub_time = csv.reader(f)
next(sub_time)
next(sub_time)

maximum = 0
max_station = ""

time = int(input("궁금한 시간대를 입력하세요:"))
           
for row in sub_time:
    row[4:] = map(int, row[4:])
    a = row[4+(time-4)* 2]     # 입력받은 시각의 승차 인원 값 추출
    if a > maximum:
        maximum = a
        max_station = row[3] + "(" + row[1] + ")"

print(max_station, maximum)

4. 시간대별로 승차 인원이 가장 많은 역은?

시간대별 최대 승차 역 이름 및 승차 인원 출력

import csv

f = open("C:/Users/82109/OneDrive/바탕 화면/subwaytime.csv")
sub_time = csv.reader(f)
next(sub_time)
next(sub_time)

maximum = [0] * 24
max_station = [""] * 24
           
for row in sub_time:
    row[4:] = map(int, row[4:])
    for i in range(24):
        a = row[i*2+4]
        if a > maximum[i]:
            maximum[i] = a
            max_station[i] = row[3]

print(max_station)
print(maximum)

시간대별 최대 승차 역 이름 및 승차 인원을 막대 그래프로 표현

import csv
import matplotlib.pyplot as plt

f = open("C:/Users/82109/OneDrive/바탕 화면/subwaytime.csv")
sub_time = csv.reader(f)
next(sub_time)
next(sub_time)

maximum = [0] * 24
max_station = [""] * 24
           
for row in sub_time:
    row[4:] = map(int, row[4:])
    for i in range(24):
        a = row[i*2+4]
        if a > maximum[i]:
            maximum[i] = a
            max_station[i] = row[3] + "(" + str(i+4) + ")"

plt.rc("font", family = "Malgun Gothic")
plt.bar(range(24), maximum)
plt.xticks(range(24), max_station, rotation = 90)     # rotation : 축 눈금 레이블 회전
plt.show()

# plt.xticks(눈금을 적용할 x축의 실제 위치, 해당 위치에 나타낼 값) : 축 눈금 설정

5. 모든 지하철역의 시간대별 승하차 인원을 모두 더하기

모든 지하철역의 시간대별 승하차 인원 추이

import csv
import matplotlib.pyplot as plt

f = open("C:/Users/82109/OneDrive/바탕 화면/subwaytime.csv")
sub_time = csv.reader(f)
next(sub_time)
next(sub_time)

sub_in = [0] * 24     
sub_out = [0] * 24
           
for row in sub_time:
    row[4:] = map(int, row[4:])
    for i in range(24):
        sub_in[i] += row[4+i*2]
        sub_out[i] += row[5+i*2]

plt.figure(dpi = 100)
plt.style.use("ggplot")
plt.rc("font", family = "Malgun Gothic")
plt.title("지하철역의 시간대별 승하차 인원 추이")
plt.plot(sub_in, label = "승차")
plt.plot(sub_out, label = "하차")
plt.legend()
plt.xticks(range(24), range(4,28))     # 0 ~ 23 위치에 4 ~ 27 값 적용
plt.show()

저작자표시 비영리 변경금지 (새창열림)