Crawling

packages: requests, bs4

import requests
from datetime import datetime
from bs4 import BeautifulSoup

today = datetime.today()
day = today.strftime("%Y%m%d")
print(day)

headers = {'User-Agent' : 'Mozilla/5.0 (Wind ows NT 10.0; Win64; x64)AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36'}
data = requests.get('<https://movie.naver.com/movie/sdb/rank/rmovie.nhn?sel=pnt&date='+str(day)>, headers=headers)

soup = BeautifulSoup(data.text, 'html.parser')

# print(soup)
# #old_content > table > tbody > tr:nth-child(2)
# #old_content > table > tbody > tr:nth-child(3)
# #old_content > table > tbody > tr

trs = soup.select('#old_content > table > tbody > tr')
for tr in trs:
	# #old_content > table > tbody > tr:nth-child(2) > td.title > div > a
	a_tag = tr.select_one('td.title > div > a')

	if a_tag != None:
		# #old_content > table > tbody > tr:nth-child(2) > td:nth-child(1) > img
		number = tr.select_one('td:nth-child(1) > img')['alt']
		title = a_tag.text
		# #old_content > table > tbody > tr:nth-child(2) > td.point
		rank = tr.select_one('td.point').text

		print(number, title, rank)

DB

program: MongoDB, Robo3T

DB

from pymongo import MongoClient
client = MongoClient('localhost', 27017)
db = client.dbsparta

"""
1. insert
2. find
3. update
4. delete
"""

# insert as dictionary form
"""
doc = {'name': 'jane', 'age': 21}
db.users.insert_one(doc)
"""

# find and put it into same_ages as list
# w/o _id False, there will be useless information
# list(db.users.find({}, {'_id': False})) will show every infos
same_ages = list(db.users.find({'age': 21}, {'_id': False}))
for person in same_ages:
	print(person)

# find only one info whose name is 'bobby'
user = db.users.find_one({'name': 'bobby'}, {'_id': False})
print(user)

# update
# find named 'bobby' and set age to 19
db.users.update_one({'name': 'bobby'}, {'$set': {'age': 19}})

# delete
# find named 'bobby' and delete that one
db.users.delete_one({'name': 'bobby'})

quiz

from pymongo import MongoClient
client = MongoClient('localhost', 27017)
db = client.dbsparta

# 1. Get rate of 'Matrix'
movie = db.movies.find_one({'title': '매트릭스'}, {'_id': False})
print(movie['rank'])

# 2. Get titles which are same rate with 'Matrix'
movies = list(db.movies.find({'rank': movie['rank']}, {'_id': False}))
for movie in movies:
	print(movie['title'])

# 3. Set Matrix's rate to 0
db.movies.update_one({'title': '매트릭스'}, {'$set': {'rank': "0.00"}})  # every infos are string format

movie = db.movies.find_one({'title': '매트릭스'}, {'_id': False})
print(movie)