##导入相关模块
import requests
import os
import pandas as pd
import json
import numpy as np
import re
twitter_archive_enhanced = pd.read_csv("twitter-archive-enhanced.csv") #将csv文件读取到dataframe
#tweet_id = twitter_archive_enhanced["tweet_id"].astype(str).tolist()
# 将txt文本文件读取到dataframe,但文本内容是Json格式,需要JSON库来读取
tweet_list =[] # 为方便读取到dataframe,首先创建一个列表
with open("tweet_json.txt") as file: # 打开txt文件
for line in file: # 采用循环遍历,单行读取Json文件
tweet_id = json.loads(line)["id_str"] # 获取每行Json文件的"id_str","id"的数据类型后续需要处理,所以提取"id_str"
retweet_count = json.loads(line)["retweet_count"] # 获取每行json文件的转发数
favorite_count = json.loads(line)["favorite_count"] # 获取每行json文件的喜爱数
tweet_list.append({"tweet_id":tweet_id, #将字典添加到列表中
"retweet_count":retweet_count,
"favorite_count":favorite_count})
extra_data = pd.DataFrame(tweet_list,columns = ["tweet_id","retweet_count","favorite_count"]) #转换为dataframe
len(tweet_list)
2352
extra_data.info()
RangeIndex: 2352 entries, 0 to 2351
Data columns (total 3 columns):
tweet_id 2352 non-null object
retweet_count 2352 non-null int64
favorite_count 2352 non-null int64
dtypes: int64(2), object(1)
memory usage: 55.2+ KB
##将下载的文件存储至folder_name路径文件夹下,如果文件夹不存在,通过如下代码创建.
folder_name = os.getcwd() ##在当前文件夹下
if not os.path.exists(folder_name):
os.makedirs(folder_name)
##在url上使用requests.get,返回一个响应,这个url是推特图像的预测数据url
url = "https://raw.githubusercontent.com/udacity/new-dand-advanced-china/master/%E6%95%B0%E6%8D%AE%E6%B8%85%E6%B4%97/WeRateDogs%E9%A1%B9%E7%9B%AE/image-predictions.tsv"
response = requests.get(url)
##将文件保存至所建路径
with open(os.path.join(folder_name,url.split("/")[-1]),mode = "wb") as file:
file.write(response.content)
image_predictions = pd.read_csv(os.path.join(folder_name,url.split("/")[-1]),sep="\t") # 将tsv文件读取为python的dataframe
# 将表中每个单元的内容显示完整
pd.set_option('max_colwidth', 1000)
twitter_archive_enhanced
tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 892420643555336193 | NaN | NaN | 2017-08-01 16:23:56 +0000 | Twitter for iPhone | This is Phineas. He's a mystical boy. Only ever appears in the hole of a donut. 13/10 https://t.co/MgUWQ76dJU | NaN | NaN | NaN | https://twitter.com/dog_rates/status/892420643555336193/photo/1 | 13 | 10 | Phineas | None | None | None | None |
1 | 892177421306343426 | NaN | NaN | 2017-08-01 00:17:27 +0000 | Twitter for iPhone | This is Tilly. She's just checking pup on you. Hopes you're doing ok. If not, she's available for pats, snugs, boops, the whole bit. 13/10 https://t.co/0Xxu71qeIV | NaN | NaN | NaN | https://twitter.com/dog_rates/status/892177421306343426/photo/1 | 13 | 10 | Tilly | None | None | None | None |
2 | 891815181378084864 | NaN | NaN | 2017-07-31 00:18:03 +0000 | Twitter for iPhone | This is Archie. He is a rare Norwegian Pouncing Corgo. Lives in the tall grass. You never know when one may strike. 12/10 https://t.co/wUnZnhtVJB | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891815181378084864/photo/1 | 12 | 10 | Archie | None | None | None | None |
3 | 891689557279858688 | NaN | NaN | 2017-07-30 15:58:51 +0000 | Twitter for iPhone | This is Darla. She commenced a snooze mid meal. 13/10 happens to the best of us https://t.co/tD36da7qLQ | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891689557279858688/photo/1 | 13 | 10 | Darla | None | None | None | None |
4 | 891327558926688256 | NaN | NaN | 2017-07-29 16:00:24 +0000 | Twitter for iPhone | This is Franklin. He would like you to stop calling him "cute." He is a very fierce shark and should be respected as such. 12/10 #BarkWeek https://t.co/AtUZn91f7f | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891327558926688256/photo/1,https://twitter.com/dog_rates/status/891327558926688256/photo/1 | 12 | 10 | Franklin | None | None | None | None |
5 | 891087950875897856 | NaN | NaN | 2017-07-29 00:08:17 +0000 | Twitter for iPhone | Here we have a majestic great white breaching off South Africa's coast. Absolutely h*ckin breathtaking. 13/10 (IG: tucker_marlo) #BarkWeek https://t.co/kQ04fDDRmh | NaN | NaN | NaN | https://twitter.com/dog_rates/status/891087950875897856/photo/1 | 13 | 10 | None | None | None | None | None |
6 | 890971913173991426 | NaN | NaN | 2017-07-28 16:27:12 +0000 | Twitter for iPhone | Meet Jax. He enjoys ice cream so much he gets nervous around it. 13/10 help Jax enjoy more things by clicking below\n\nhttps://t.co/Zr4hWfAs1H https://t.co/tVJBRMnhxl | NaN | NaN | NaN | https://gofundme.com/ydvmve-surgery-for-jax,https://twitter.com/dog_rates/status/890971913173991426/photo/1 | 13 | 10 | Jax | None | None | None | None |
7 | 890729181411237888 | NaN | NaN | 2017-07-28 00:22:40 +0000 | Twitter for iPhone | When you watch your owner call another dog a good boy but then they turn back to you and say you're a great boy. 13/10 https://t.co/v0nONBcwxq | NaN | NaN | NaN | https://twitter.com/dog_rates/status/890729181411237888/photo/1,https://twitter.com/dog_rates/status/890729181411237888/photo/1 | 13 | 10 | None | None | None | None | None |
8 | 890609185150312448 | NaN | NaN | 2017-07-27 16:25:51 +0000 | Twitter for iPhone | This is Zoey. She doesn't want to be one of the scary sharks. Just wants to be a snuggly pettable boatpet. 13/10 #BarkWeek https://t.co/9TwLuAGH0b | NaN | NaN | NaN | https://twitter.com/dog_rates/status/890609185150312448/photo/1 | 13 | 10 | Zoey | None | None | None | None |
9 | 890240255349198849 | NaN | NaN | 2017-07-26 15:59:51 +0000 | Twitter for iPhone | This is Cassie. She is a college pup. Studying international doggo communication and stick theory. 14/10 so elegant much sophisticate https://t.co/t1bfwz5S2A | NaN | NaN | NaN | https://twitter.com/dog_rates/status/890240255349198849/photo/1 | 14 | 10 | Cassie | doggo | None | None | None |
10 | 890006608113172480 | NaN | NaN | 2017-07-26 00:31:25 +0000 | Twitter for iPhone | This is Koda. He is a South Australian deckshark. Deceptively deadly. Frighteningly majestic. 13/10 would risk a petting #BarkWeek https://t.co/dVPW0B0Mme | NaN | NaN | NaN | https://twitter.com/dog_rates/status/890006608113172480/photo/1,https://twitter.com/dog_rates/status/890006608113172480/photo/1 | 13 | 10 | Koda | None | None | None | None |
11 | 889880896479866881 | NaN | NaN | 2017-07-25 16:11:53 +0000 | Twitter for iPhone | This is Bruno. He is a service shark. Only gets out of the water to assist you. 13/10 terrifyingly good boy https://t.co/u1XPQMl29g | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889880896479866881/photo/1 | 13 | 10 | Bruno | None | None | None | None |
12 | 889665388333682689 | NaN | NaN | 2017-07-25 01:55:32 +0000 | Twitter for iPhone | Here's a puppo that seems to be on the fence about something haha no but seriously someone help her. 13/10 https://t.co/BxvuXk0UCm | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889665388333682689/photo/1 | 13 | 10 | None | None | None | None | puppo |
13 | 889638837579907072 | NaN | NaN | 2017-07-25 00:10:02 +0000 | Twitter for iPhone | This is Ted. He does his best. Sometimes that's not enough. But it's ok. 12/10 would assist https://t.co/f8dEDcrKSR | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889638837579907072/photo/1,https://twitter.com/dog_rates/status/889638837579907072/photo/1 | 12 | 10 | Ted | None | None | None | None |
14 | 889531135344209921 | NaN | NaN | 2017-07-24 17:02:04 +0000 | Twitter for iPhone | This is Stuart. He's sporting his favorite fanny pack. Secretly filled with bones only. 13/10 puppared puppo #BarkWeek https://t.co/y70o6h3isq | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889531135344209921/photo/1 | 13 | 10 | Stuart | None | None | None | puppo |
15 | 889278841981685760 | NaN | NaN | 2017-07-24 00:19:32 +0000 | Twitter for iPhone | This is Oliver. You're witnessing one of his many brutal attacks. Seems to be playing with his victim. 13/10 fr*ckin frightening #BarkWeek https://t.co/WpHvrQedPb | NaN | NaN | NaN | https://twitter.com/dog_rates/status/889278841981685760/video/1 | 13 | 10 | Oliver | None | None | None | None |
16 | 888917238123831296 | NaN | NaN | 2017-07-23 00:22:39 +0000 | Twitter for iPhone | This is Jim. He found a fren. Taught him how to sit like the good boys. 12/10 for both https://t.co/chxruIOUJN | NaN | NaN | NaN | https://twitter.com/dog_rates/status/888917238123831296/photo/1 | 12 | 10 | Jim | None | None | None | None |
17 | 888804989199671297 | NaN | NaN | 2017-07-22 16:56:37 +0000 | Twitter for iPhone | This is Zeke. He has a new stick. Very proud of it. Would like you to throw it for him without taking it. 13/10 would do my best https://t.co/HTQ77yNQ5K | NaN | NaN | NaN | https://twitter.com/dog_rates/status/888804989199671297/photo/1,https://twitter.com/dog_rates/status/888804989199671297/photo/1 | 13 | 10 | Zeke | None | None | None | None |
18 | 888554962724278272 | NaN | NaN | 2017-07-22 00:23:06 +0000 | Twitter for iPhone | This is Ralphus. He's powering up. Attempting maximum borkdrive. 13/10 inspirational af https://t.co/YnYAFCTTiK | NaN | NaN | NaN | https://twitter.com/dog_rates/status/888554962724278272/photo/1,https://twitter.com/dog_rates/status/888554962724278272/photo/1,https://twitter.com/dog_rates/status/888554962724278272/photo/1,https://twitter.com/dog_rates/status/888554962724278272/photo/1 | 13 | 10 | Ralphus | None | None | None | None |
19 | 888202515573088257 | NaN | NaN | 2017-07-21 01:02:36 +0000 | Twitter for iPhone | RT @dog_rates: This is Canela. She attempted some fancy porch pics. They were unsuccessful. 13/10 someone help her https://t.co/cLyzpcUcMX | 8.874740e+17 | 4.196984e+09 | 2017-07-19 00:47:34 +0000 | https://twitter.com/dog_rates/status/887473957103951883/photo/1,https://twitter.com/dog_rates/status/887473957103951883/photo/1,https://twitter.com/dog_rates/status/887473957103951883/photo/1,https://twitter.com/dog_rates/status/887473957103951883/photo/1 | 13 | 10 | Canela | None | None | None | None |
20 | 888078434458587136 | NaN | NaN | 2017-07-20 16:49:33 +0000 | Twitter for iPhone | This is Gerald. He was just told he didn't get the job he interviewed for. A h*ckin injustice. 12/10 didn't want the job anyway https://t.co/DK7iDPfuRX | NaN | NaN | NaN | https://twitter.com/dog_rates/status/888078434458587136/photo/1,https://twitter.com/dog_rates/status/888078434458587136/photo/1 | 12 | 10 | Gerald | None | None | None | None |
21 | 887705289381826560 | NaN | NaN | 2017-07-19 16:06:48 +0000 | Twitter for iPhone | This is Jeffrey. He has a monopoly on the pool noodles. Currently running a 'boop for two' midweek sale. 13/10 h*ckin strategic https://t.co/PhrUk20Q64 | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887705289381826560/photo/1 | 13 | 10 | Jeffrey | None | None | None | None |
22 | 887517139158093824 | NaN | NaN | 2017-07-19 03:39:09 +0000 | Twitter for iPhone | I've yet to rate a Venezuelan Hover Wiener. This is such an honor. 14/10 paw-inspiring af (IG: roxy.thedoxy) https://t.co/20VrLAA8ba | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887517139158093824/video/1 | 14 | 10 | such | None | None | None | None |
23 | 887473957103951883 | NaN | NaN | 2017-07-19 00:47:34 +0000 | Twitter for iPhone | This is Canela. She attempted some fancy porch pics. They were unsuccessful. 13/10 someone help her https://t.co/cLyzpcUcMX | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887473957103951883/photo/1,https://twitter.com/dog_rates/status/887473957103951883/photo/1 | 13 | 10 | Canela | None | None | None | None |
24 | 887343217045368832 | NaN | NaN | 2017-07-18 16:08:03 +0000 | Twitter for iPhone | You may not have known you needed to see this today. 13/10 please enjoy (IG: emmylouroo) https://t.co/WZqNqygEyV | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887343217045368832/video/1 | 13 | 10 | None | None | None | None | None |
25 | 887101392804085760 | NaN | NaN | 2017-07-18 00:07:08 +0000 | Twitter for iPhone | This... is a Jubilant Antarctic House Bear. We only rate dogs. Please only send dogs. Thank you... 12/10 would suffocate in floof https://t.co/4Ad1jzJSdp | NaN | NaN | NaN | https://twitter.com/dog_rates/status/887101392804085760/photo/1 | 12 | 10 | None | None | None | None | None |
26 | 886983233522544640 | NaN | NaN | 2017-07-17 16:17:36 +0000 | Twitter for iPhone | This is Maya. She's very shy. Rarely leaves her cup. 13/10 would find her an environment to thrive in https://t.co/I6oNy0CgiT | NaN | NaN | NaN | https://twitter.com/dog_rates/status/886983233522544640/photo/1,https://twitter.com/dog_rates/status/886983233522544640/photo/1 | 13 | 10 | Maya | None | None | None | None |
27 | 886736880519319552 | NaN | NaN | 2017-07-16 23:58:41 +0000 | Twitter for iPhone | This is Mingus. He's a wonderful father to his smol pup. Confirmed 13/10, but he needs your help\n\nhttps://t.co/bVi0Yr4Cff https://t.co/ISvKOSkd5b | NaN | NaN | NaN | https://www.gofundme.com/mingusneedsus,https://twitter.com/dog_rates/status/886736880519319552/photo/1,https://twitter.com/dog_rates/status/886736880519319552/photo/1 | 13 | 10 | Mingus | None | None | None | None |
28 | 886680336477933568 | NaN | NaN | 2017-07-16 20:14:00 +0000 | Twitter for iPhone | This is Derek. He's late for a dog meeting. 13/10 pet...al to the metal https://t.co/BCoWue0abA | NaN | NaN | NaN | https://twitter.com/dog_rates/status/886680336477933568/photo/1 | 13 | 10 | Derek | None | None | None | None |
29 | 886366144734445568 | NaN | NaN | 2017-07-15 23:25:31 +0000 | Twitter for iPhone | This is Roscoe. Another pupper fallen victim to spontaneous tongue ejections. Get the BlepiPen immediate. 12/10 deep breaths Roscoe https://t.co/RGE08MIJox | NaN | NaN | NaN | https://twitter.com/dog_rates/status/886366144734445568/photo/1,https://twitter.com/dog_rates/status/886366144734445568/photo/1 | 12 | 10 | Roscoe | None | None | pupper | None |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2326 | 666411507551481857 | NaN | NaN | 2015-11-17 00:24:19 +0000 | Twitter for iPhone | This is quite the dog. Gets really excited when not in water. Not very soft tho. Bad at fetch. Can't do tricks. 2/10 https://t.co/aMCTNWO94t | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666411507551481857/photo/1 | 2 | 10 | quite | None | None | None | None |
2327 | 666407126856765440 | NaN | NaN | 2015-11-17 00:06:54 +0000 | Twitter for iPhone | This is a southern Vesuvius bumblegruff. Can drive a truck (wow). Made friends with 5 other nifty dogs (neat). 7/10 https://t.co/LopTBkKa8h | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666407126856765440/photo/1 | 7 | 10 | a | None | None | None | None |
2328 | 666396247373291520 | NaN | NaN | 2015-11-16 23:23:41 +0000 | Twitter for iPhone | Oh goodness. A super rare northeast Qdoba kangaroo mix. Massive feet. No pouch (disappointing). Seems alert. 9/10 https://t.co/Dc7b0E8qFE | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666396247373291520/photo/1 | 9 | 10 | None | None | None | None | None |
2329 | 666373753744588802 | NaN | NaN | 2015-11-16 21:54:18 +0000 | Twitter for iPhone | Those are sunglasses and a jean jacket. 11/10 dog cool af https://t.co/uHXrPkUEyl | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666373753744588802/photo/1 | 11 | 10 | None | None | None | None | None |
2330 | 666362758909284353 | NaN | NaN | 2015-11-16 21:10:36 +0000 | Twitter for iPhone | Unique dog here. Very small. Lives in container of Frosted Flakes (?). Short legs. Must be rare 6/10 would still pet https://t.co/XMD9CwjEnM | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666362758909284353/photo/1 | 6 | 10 | None | None | None | None | None |
2331 | 666353288456101888 | NaN | NaN | 2015-11-16 20:32:58 +0000 | Twitter for iPhone | Here we have a mixed Asiago from the Galápagos Islands. Only one ear working. Big fan of marijuana carpet. 8/10 https://t.co/tltQ5w9aUO | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666353288456101888/photo/1 | 8 | 10 | None | None | None | None | None |
2332 | 666345417576210432 | NaN | NaN | 2015-11-16 20:01:42 +0000 | Twitter for iPhone | Look at this jokester thinking seat belt laws don't apply to him. Great tongue tho 10/10 https://t.co/VFKG1vxGjB | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666345417576210432/photo/1 | 10 | 10 | None | None | None | None | None |
2333 | 666337882303524864 | NaN | NaN | 2015-11-16 19:31:45 +0000 | Twitter for iPhone | This is an extremely rare horned Parthenon. Not amused. Wears shoes. Overall very nice. 9/10 would pet aggressively https://t.co/QpRjllzWAL | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666337882303524864/photo/1 | 9 | 10 | an | None | None | None | None |
2334 | 666293911632134144 | NaN | NaN | 2015-11-16 16:37:02 +0000 | Twitter for iPhone | This is a funny dog. Weird toes. Won't come down. Loves branch. Refuses to eat his food. Hard to cuddle with. 3/10 https://t.co/IIXis0zta0 | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666293911632134144/photo/1 | 3 | 10 | a | None | None | None | None |
2335 | 666287406224695296 | NaN | NaN | 2015-11-16 16:11:11 +0000 | Twitter for iPhone | This is an Albanian 3 1/2 legged Episcopalian. Loves well-polished hardwood flooring. Penis on the collar. 9/10 https://t.co/d9NcXFKwLv | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666287406224695296/photo/1 | 1 | 2 | an | None | None | None | None |
2336 | 666273097616637952 | NaN | NaN | 2015-11-16 15:14:19 +0000 | Twitter for iPhone | Can take selfies 11/10 https://t.co/ws2AMaNwPW | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666273097616637952/photo/1 | 11 | 10 | None | None | None | None | None |
2337 | 666268910803644416 | NaN | NaN | 2015-11-16 14:57:41 +0000 | Twitter for iPhone | Very concerned about fellow dog trapped in computer. 10/10 https://t.co/0yxApIikpk | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666268910803644416/photo/1 | 10 | 10 | None | None | None | None | None |
2338 | 666104133288665088 | NaN | NaN | 2015-11-16 04:02:55 +0000 | Twitter for iPhone | Not familiar with this breed. No tail (weird). Only 2 legs. Doesn't bark. Surprisingly quick. Shits eggs. 1/10 https://t.co/Asgdc6kuLX | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666104133288665088/photo/1 | 1 | 10 | None | None | None | None | None |
2339 | 666102155909144576 | NaN | NaN | 2015-11-16 03:55:04 +0000 | Twitter for iPhone | Oh my. Here you are seeing an Adobe Setter giving birth to twins!!! The world is an amazing place. 11/10 https://t.co/11LvqN4WLq | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666102155909144576/photo/1 | 11 | 10 | None | None | None | None | None |
2340 | 666099513787052032 | NaN | NaN | 2015-11-16 03:44:34 +0000 | Twitter for iPhone | Can stand on stump for what seems like a while. Built that birdhouse? Impressive. Made friends with a squirrel. 8/10 https://t.co/Ri4nMTLq5C | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666099513787052032/photo/1 | 8 | 10 | None | None | None | None | None |
2341 | 666094000022159362 | NaN | NaN | 2015-11-16 03:22:39 +0000 | Twitter for iPhone | This appears to be a Mongolian Presbyterian mix. Very tired. Tongue slip confirmed. 9/10 would lie down with https://t.co/mnioXo3IfP | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666094000022159362/photo/1 | 9 | 10 | None | None | None | None | None |
2342 | 666082916733198337 | NaN | NaN | 2015-11-16 02:38:37 +0000 | Twitter for iPhone | Here we have a well-established sunblockerspaniel. Lost his other flip-flop. 6/10 not very waterproof https://t.co/3RU6x0vHB7 | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666082916733198337/photo/1 | 6 | 10 | None | None | None | None | None |
2343 | 666073100786774016 | NaN | NaN | 2015-11-16 01:59:36 +0000 | Twitter for iPhone | Let's hope this flight isn't Malaysian (lol). What a dog! Almost completely camouflaged. 10/10 I trust this pilot https://t.co/Yk6GHE9tOY | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666073100786774016/photo/1 | 10 | 10 | None | None | None | None | None |
2344 | 666071193221509120 | NaN | NaN | 2015-11-16 01:52:02 +0000 | Twitter for iPhone | Here we have a northern speckled Rhododendron. Much sass. Gives 0 fucks. Good tongue. 9/10 would caress sensually https://t.co/ZoL8kq2XFx | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666071193221509120/photo/1 | 9 | 10 | None | None | None | None | None |
2345 | 666063827256086533 | NaN | NaN | 2015-11-16 01:22:45 +0000 | Twitter for iPhone | This is the happiest dog you will ever see. Very committed owner. Nice couch. 10/10 https://t.co/RhUEAloehK | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666063827256086533/photo/1 | 10 | 10 | the | None | None | None | None |
2346 | 666058600524156928 | NaN | NaN | 2015-11-16 01:01:59 +0000 | Twitter for iPhone | Here is the Rand Paul of retrievers folks! He's probably good at poker. Can drink beer (lol rad). 8/10 good dog https://t.co/pYAJkAe76p | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666058600524156928/photo/1 | 8 | 10 | the | None | None | None | None |
2347 | 666057090499244032 | NaN | NaN | 2015-11-16 00:55:59 +0000 | Twitter for iPhone | My oh my. This is a rare blond Canadian terrier on wheels. Only $8.98. Rather docile. 9/10 very rare https://t.co/yWBqbrzy8O | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666057090499244032/photo/1 | 9 | 10 | a | None | None | None | None |
2348 | 666055525042405380 | NaN | NaN | 2015-11-16 00:49:46 +0000 | Twitter for iPhone | Here is a Siberian heavily armored polar bear mix. Strong owner. 10/10 I would do unspeakable things to pet this dog https://t.co/rdivxLiqEt | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666055525042405380/photo/1 | 10 | 10 | a | None | None | None | None |
2349 | 666051853826850816 | NaN | NaN | 2015-11-16 00:35:11 +0000 | Twitter for iPhone | This is an odd dog. Hard on the outside but loving on the inside. Petting still fun. Doesn't play catch well. 2/10 https://t.co/v5A4vzSDdc | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666051853826850816/photo/1 | 2 | 10 | an | None | None | None | None |
2350 | 666050758794694657 | NaN | NaN | 2015-11-16 00:30:50 +0000 | Twitter for iPhone | This is a truly beautiful English Wilson Staff retriever. Has a nice phone. Privileged. 10/10 would trade lives with https://t.co/fvIbQfHjIe | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666050758794694657/photo/1 | 10 | 10 | a | None | None | None | None |
2351 | 666049248165822465 | NaN | NaN | 2015-11-16 00:24:50 +0000 | Twitter for iPhone | Here we have a 1949 1st generation vulpix. Enjoys sweat tea and Fox News. Cannot be phased. 5/10 https://t.co/4B7cOc1EDq | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666049248165822465/photo/1 | 5 | 10 | None | None | None | None | None |
2352 | 666044226329800704 | NaN | NaN | 2015-11-16 00:04:52 +0000 | Twitter for iPhone | This is a purebred Piers Morgan. Loves to Netflix and chill. Always looks like he forgot to unplug the iron. 6/10 https://t.co/DWnyCjf2mx | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666044226329800704/photo/1 | 6 | 10 | a | None | None | None | None |
2353 | 666033412701032449 | NaN | NaN | 2015-11-15 23:21:54 +0000 | Twitter for iPhone | Here is a very happy pup. Big fan of well-maintained decks. Just look at that tongue. 9/10 would cuddle af https://t.co/y671yMhoiR | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666033412701032449/photo/1 | 9 | 10 | a | None | None | None | None |
2354 | 666029285002620928 | NaN | NaN | 2015-11-15 23:05:30 +0000 | Twitter for iPhone | This is a western brown Mitsubishi terrier. Upset about leaf. Actually 2 dogs here. 7/10 would walk the shit out of https://t.co/r7mOb2m0UI | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666029285002620928/photo/1 | 7 | 10 | a | None | None | None | None |
2355 | 666020888022790149 | NaN | NaN | 2015-11-15 22:32:08 +0000 | Twitter for iPhone | Here we have a Japanese Irish Setter. Lost eye in Vietnam (?). Big fan of relaxing on stair. 8/10 would pet https://t.co/BLDqew2Ijj | NaN | NaN | NaN | https://twitter.com/dog_rates/status/666020888022790149/photo/1 | 8 | 10 | None | None | None | None | None |
2356 rows × 17 columns
extra_data
tweet_id | retweet_count | favorite_count | |
---|---|---|---|
0 | 892420643555336193 | 8842 | 39492 |
1 | 892177421306343426 | 6480 | 33786 |
2 | 891815181378084864 | 4301 | 25445 |
3 | 891689557279858688 | 8925 | 42863 |
4 | 891327558926688256 | 9721 | 41016 |
5 | 891087950875897856 | 3240 | 20548 |
6 | 890971913173991426 | 2142 | 12053 |
7 | 890729181411237888 | 19548 | 66596 |
8 | 890609185150312448 | 4403 | 28187 |
9 | 890240255349198849 | 7684 | 32467 |
10 | 890006608113172480 | 7584 | 31127 |
11 | 889880896479866881 | 5116 | 28208 |
12 | 889665388333682689 | 8502 | 38745 |
13 | 889638837579907072 | 4705 | 27633 |
14 | 889531135344209921 | 2309 | 15329 |
15 | 889278841981685760 | 5635 | 25712 |
16 | 888917238123831296 | 4681 | 29555 |
17 | 888804989199671297 | 4535 | 26021 |
18 | 888554962724278272 | 3722 | 20267 |
19 | 888078434458587136 | 3637 | 22144 |
20 | 887705289381826560 | 5584 | 30690 |
21 | 887517139158093824 | 12053 | 46940 |
22 | 887473957103951883 | 18813 | 70007 |
23 | 887343217045368832 | 10713 | 34223 |
24 | 887101392804085760 | 6147 | 31045 |
25 | 886983233522544640 | 8045 | 35786 |
26 | 886736880519319552 | 3420 | 12286 |
27 | 886680336477933568 | 4597 | 22802 |
28 | 886366144734445568 | 3297 | 21488 |
29 | 886267009285017600 | 4 | 117 |
... | ... | ... | ... |
2322 | 666411507551481857 | 337 | 457 |
2323 | 666407126856765440 | 43 | 113 |
2324 | 666396247373291520 | 91 | 171 |
2325 | 666373753744588802 | 99 | 194 |
2326 | 666362758909284353 | 590 | 801 |
2327 | 666353288456101888 | 76 | 228 |
2328 | 666345417576210432 | 146 | 308 |
2329 | 666337882303524864 | 96 | 203 |
2330 | 666293911632134144 | 365 | 519 |
2331 | 666287406224695296 | 71 | 152 |
2332 | 666273097616637952 | 81 | 183 |
2333 | 666268910803644416 | 37 | 108 |
2334 | 666104133288665088 | 6835 | 14703 |
2335 | 666102155909144576 | 15 | 81 |
2336 | 666099513787052032 | 73 | 160 |
2337 | 666094000022159362 | 78 | 168 |
2338 | 666082916733198337 | 47 | 121 |
2339 | 666073100786774016 | 173 | 334 |
2340 | 666071193221509120 | 67 | 154 |
2341 | 666063827256086533 | 230 | 494 |
2342 | 666058600524156928 | 61 | 117 |
2343 | 666057090499244032 | 146 | 304 |
2344 | 666055525042405380 | 261 | 449 |
2345 | 666051853826850816 | 877 | 1250 |
2346 | 666050758794694657 | 60 | 136 |
2347 | 666049248165822465 | 41 | 111 |
2348 | 666044226329800704 | 147 | 309 |
2349 | 666033412701032449 | 47 | 128 |
2350 | 666029285002620928 | 48 | 132 |
2351 | 666020888022790149 | 530 | 2528 |
2352 rows × 3 columns
image_predictions
tweet_id | jpg_url | img_num | p1 | p1_conf | p1_dog | p2 | p2_conf | p2_dog | p3 | p3_conf | p3_dog | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 666020888022790149 | https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg | 1 | Welsh_springer_spaniel | 0.465074 | True | collie | 0.156665 | True | Shetland_sheepdog | 0.061428 | True |
1 | 666029285002620928 | https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg | 1 | redbone | 0.506826 | True | miniature_pinscher | 0.074192 | True | Rhodesian_ridgeback | 0.072010 | True |
2 | 666033412701032449 | https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg | 1 | German_shepherd | 0.596461 | True | malinois | 0.138584 | True | bloodhound | 0.116197 | True |
3 | 666044226329800704 | https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg | 1 | Rhodesian_ridgeback | 0.408143 | True | redbone | 0.360687 | True | miniature_pinscher | 0.222752 | True |
4 | 666049248165822465 | https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg | 1 | miniature_pinscher | 0.560311 | True | Rottweiler | 0.243682 | True | Doberman | 0.154629 | True |
5 | 666050758794694657 | https://pbs.twimg.com/media/CT5Jof1WUAEuVxN.jpg | 1 | Bernese_mountain_dog | 0.651137 | True | English_springer | 0.263788 | True | Greater_Swiss_Mountain_dog | 0.016199 | True |
6 | 666051853826850816 | https://pbs.twimg.com/media/CT5KoJ1WoAAJash.jpg | 1 | box_turtle | 0.933012 | False | mud_turtle | 0.045885 | False | terrapin | 0.017885 | False |
7 | 666055525042405380 | https://pbs.twimg.com/media/CT5N9tpXIAAifs1.jpg | 1 | chow | 0.692517 | True | Tibetan_mastiff | 0.058279 | True | fur_coat | 0.054449 | False |
8 | 666057090499244032 | https://pbs.twimg.com/media/CT5PY90WoAAQGLo.jpg | 1 | shopping_cart | 0.962465 | False | shopping_basket | 0.014594 | False | golden_retriever | 0.007959 | True |
9 | 666058600524156928 | https://pbs.twimg.com/media/CT5Qw94XAAA_2dP.jpg | 1 | miniature_poodle | 0.201493 | True | komondor | 0.192305 | True | soft-coated_wheaten_terrier | 0.082086 | True |
10 | 666063827256086533 | https://pbs.twimg.com/media/CT5Vg_wXIAAXfnj.jpg | 1 | golden_retriever | 0.775930 | True | Tibetan_mastiff | 0.093718 | True | Labrador_retriever | 0.072427 | True |
11 | 666071193221509120 | https://pbs.twimg.com/media/CT5cN_3WEAAlOoZ.jpg | 1 | Gordon_setter | 0.503672 | True | Yorkshire_terrier | 0.174201 | True | Pekinese | 0.109454 | True |
12 | 666073100786774016 | https://pbs.twimg.com/media/CT5d9DZXAAALcwe.jpg | 1 | Walker_hound | 0.260857 | True | English_foxhound | 0.175382 | True | Ibizan_hound | 0.097471 | True |
13 | 666082916733198337 | https://pbs.twimg.com/media/CT5m4VGWEAAtKc8.jpg | 1 | pug | 0.489814 | True | bull_mastiff | 0.404722 | True | French_bulldog | 0.048960 | True |
14 | 666094000022159362 | https://pbs.twimg.com/media/CT5w9gUW4AAsBNN.jpg | 1 | bloodhound | 0.195217 | True | German_shepherd | 0.078260 | True | malinois | 0.075628 | True |
15 | 666099513787052032 | https://pbs.twimg.com/media/CT51-JJUEAA6hV8.jpg | 1 | Lhasa | 0.582330 | True | Shih-Tzu | 0.166192 | True | Dandie_Dinmont | 0.089688 | True |
16 | 666102155909144576 | https://pbs.twimg.com/media/CT54YGiWUAEZnoK.jpg | 1 | English_setter | 0.298617 | True | Newfoundland | 0.149842 | True | borzoi | 0.133649 | True |
17 | 666104133288665088 | https://pbs.twimg.com/media/CT56LSZWoAAlJj2.jpg | 1 | hen | 0.965932 | False | cock | 0.033919 | False | partridge | 0.000052 | False |
18 | 666268910803644416 | https://pbs.twimg.com/media/CT8QCd1WEAADXws.jpg | 1 | desktop_computer | 0.086502 | False | desk | 0.085547 | False | bookcase | 0.079480 | False |
19 | 666273097616637952 | https://pbs.twimg.com/media/CT8T1mtUwAA3aqm.jpg | 1 | Italian_greyhound | 0.176053 | True | toy_terrier | 0.111884 | True | basenji | 0.111152 | True |
20 | 666287406224695296 | https://pbs.twimg.com/media/CT8g3BpUEAAuFjg.jpg | 1 | Maltese_dog | 0.857531 | True | toy_poodle | 0.063064 | True | miniature_poodle | 0.025581 | True |
21 | 666293911632134144 | https://pbs.twimg.com/media/CT8mx7KW4AEQu8N.jpg | 1 | three-toed_sloth | 0.914671 | False | otter | 0.015250 | False | great_grey_owl | 0.013207 | False |
22 | 666337882303524864 | https://pbs.twimg.com/media/CT9OwFIWEAMuRje.jpg | 1 | ox | 0.416669 | False | Newfoundland | 0.278407 | True | groenendael | 0.102643 | True |
23 | 666345417576210432 | https://pbs.twimg.com/media/CT9Vn7PWoAA_ZCM.jpg | 1 | golden_retriever | 0.858744 | True | Chesapeake_Bay_retriever | 0.054787 | True | Labrador_retriever | 0.014241 | True |
24 | 666353288456101888 | https://pbs.twimg.com/media/CT9cx0tUEAAhNN_.jpg | 1 | malamute | 0.336874 | True | Siberian_husky | 0.147655 | True | Eskimo_dog | 0.093412 | True |
25 | 666362758909284353 | https://pbs.twimg.com/media/CT9lXGsUcAAyUFt.jpg | 1 | guinea_pig | 0.996496 | False | skunk | 0.002402 | False | hamster | 0.000461 | False |
26 | 666373753744588802 | https://pbs.twimg.com/media/CT9vZEYWUAAlZ05.jpg | 1 | soft-coated_wheaten_terrier | 0.326467 | True | Afghan_hound | 0.259551 | True | briard | 0.206803 | True |
27 | 666396247373291520 | https://pbs.twimg.com/media/CT-D2ZHWIAA3gK1.jpg | 1 | Chihuahua | 0.978108 | True | toy_terrier | 0.009397 | True | papillon | 0.004577 | True |
28 | 666407126856765440 | https://pbs.twimg.com/media/CT-NvwmW4AAugGZ.jpg | 1 | black-and-tan_coonhound | 0.529139 | True | bloodhound | 0.244220 | True | flat-coated_retriever | 0.173810 | True |
29 | 666411507551481857 | https://pbs.twimg.com/media/CT-RugiWIAELEaq.jpg | 1 | coho | 0.404640 | False | barracouta | 0.271485 | False | gar | 0.189945 | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2045 | 886366144734445568 | https://pbs.twimg.com/media/DE0BTnQUwAApKEH.jpg | 1 | French_bulldog | 0.999201 | True | Chihuahua | 0.000361 | True | Boston_bull | 0.000076 | True |
2046 | 886680336477933568 | https://pbs.twimg.com/media/DE4fEDzWAAAyHMM.jpg | 1 | convertible | 0.738995 | False | sports_car | 0.139952 | False | car_wheel | 0.044173 | False |
2047 | 886736880519319552 | https://pbs.twimg.com/media/DE5Se8FXcAAJFx4.jpg | 1 | kuvasz | 0.309706 | True | Great_Pyrenees | 0.186136 | True | Dandie_Dinmont | 0.086346 | True |
2048 | 886983233522544640 | https://pbs.twimg.com/media/DE8yicJW0AAAvBJ.jpg | 2 | Chihuahua | 0.793469 | True | toy_terrier | 0.143528 | True | can_opener | 0.032253 | False |
2049 | 887101392804085760 | https://pbs.twimg.com/media/DE-eAq6UwAA-jaE.jpg | 1 | Samoyed | 0.733942 | True | Eskimo_dog | 0.035029 | True | Staffordshire_bullterrier | 0.029705 | True |
2050 | 887343217045368832 | https://pbs.twimg.com/ext_tw_video_thumb/887343120832229379/pu/img/6HSuFrW1lzI_9Mht.jpg | 1 | Mexican_hairless | 0.330741 | True | sea_lion | 0.275645 | False | Weimaraner | 0.134203 | True |
2051 | 887473957103951883 | https://pbs.twimg.com/media/DFDw2tyUQAAAFke.jpg | 2 | Pembroke | 0.809197 | True | Rhodesian_ridgeback | 0.054950 | True | beagle | 0.038915 | True |
2052 | 887517139158093824 | https://pbs.twimg.com/ext_tw_video_thumb/887517108413886465/pu/img/WanJKwssZj4VJvL9.jpg | 1 | limousine | 0.130432 | False | tow_truck | 0.029175 | False | shopping_cart | 0.026321 | False |
2053 | 887705289381826560 | https://pbs.twimg.com/media/DFHDQBbXgAEqY7t.jpg | 1 | basset | 0.821664 | True | redbone | 0.087582 | True | Weimaraner | 0.026236 | True |
2054 | 888078434458587136 | https://pbs.twimg.com/media/DFMWn56WsAAkA7B.jpg | 1 | French_bulldog | 0.995026 | True | pug | 0.000932 | True | bull_mastiff | 0.000903 | True |
2055 | 888202515573088257 | https://pbs.twimg.com/media/DFDw2tyUQAAAFke.jpg | 2 | Pembroke | 0.809197 | True | Rhodesian_ridgeback | 0.054950 | True | beagle | 0.038915 | True |
2056 | 888554962724278272 | https://pbs.twimg.com/media/DFTH_O-UQAACu20.jpg | 3 | Siberian_husky | 0.700377 | True | Eskimo_dog | 0.166511 | True | malamute | 0.111411 | True |
2057 | 888804989199671297 | https://pbs.twimg.com/media/DFWra-3VYAA2piG.jpg | 1 | golden_retriever | 0.469760 | True | Labrador_retriever | 0.184172 | True | English_setter | 0.073482 | True |
2058 | 888917238123831296 | https://pbs.twimg.com/media/DFYRgsOUQAARGhO.jpg | 1 | golden_retriever | 0.714719 | True | Tibetan_mastiff | 0.120184 | True | Labrador_retriever | 0.105506 | True |
2059 | 889278841981685760 | https://pbs.twimg.com/ext_tw_video_thumb/889278779352338437/pu/img/VlbFB3v8H8VwzVNY.jpg | 1 | whippet | 0.626152 | True | borzoi | 0.194742 | True | Saluki | 0.027351 | True |
2060 | 889531135344209921 | https://pbs.twimg.com/media/DFg_2PVW0AEHN3p.jpg | 1 | golden_retriever | 0.953442 | True | Labrador_retriever | 0.013834 | True | redbone | 0.007958 | True |
2061 | 889638837579907072 | https://pbs.twimg.com/media/DFihzFfXsAYGDPR.jpg | 1 | French_bulldog | 0.991650 | True | boxer | 0.002129 | True | Staffordshire_bullterrier | 0.001498 | True |
2062 | 889665388333682689 | https://pbs.twimg.com/media/DFi579UWsAAatzw.jpg | 1 | Pembroke | 0.966327 | True | Cardigan | 0.027356 | True | basenji | 0.004633 | True |
2063 | 889880896479866881 | https://pbs.twimg.com/media/DFl99B1WsAITKsg.jpg | 1 | French_bulldog | 0.377417 | True | Labrador_retriever | 0.151317 | True | muzzle | 0.082981 | False |
2064 | 890006608113172480 | https://pbs.twimg.com/media/DFnwSY4WAAAMliS.jpg | 1 | Samoyed | 0.957979 | True | Pomeranian | 0.013884 | True | chow | 0.008167 | True |
2065 | 890240255349198849 | https://pbs.twimg.com/media/DFrEyVuW0AAO3t9.jpg | 1 | Pembroke | 0.511319 | True | Cardigan | 0.451038 | True | Chihuahua | 0.029248 | True |
2066 | 890609185150312448 | https://pbs.twimg.com/media/DFwUU__XcAEpyXI.jpg | 1 | Irish_terrier | 0.487574 | True | Irish_setter | 0.193054 | True | Chesapeake_Bay_retriever | 0.118184 | True |
2067 | 890729181411237888 | https://pbs.twimg.com/media/DFyBahAVwAAhUTd.jpg | 2 | Pomeranian | 0.566142 | True | Eskimo_dog | 0.178406 | True | Pembroke | 0.076507 | True |
2068 | 890971913173991426 | https://pbs.twimg.com/media/DF1eOmZXUAALUcq.jpg | 1 | Appenzeller | 0.341703 | True | Border_collie | 0.199287 | True | ice_lolly | 0.193548 | False |
2069 | 891087950875897856 | https://pbs.twimg.com/media/DF3HwyEWsAABqE6.jpg | 1 | Chesapeake_Bay_retriever | 0.425595 | True | Irish_terrier | 0.116317 | True | Indian_elephant | 0.076902 | False |
2070 | 891327558926688256 | https://pbs.twimg.com/media/DF6hr6BUMAAzZgT.jpg | 2 | basset | 0.555712 | True | English_springer | 0.225770 | True | German_short-haired_pointer | 0.175219 | True |
2071 | 891689557279858688 | https://pbs.twimg.com/media/DF_q7IAWsAEuuN8.jpg | 1 | paper_towel | 0.170278 | False | Labrador_retriever | 0.168086 | True | spatula | 0.040836 | False |
2072 | 891815181378084864 | https://pbs.twimg.com/media/DGBdLU1WsAANxJ9.jpg | 1 | Chihuahua | 0.716012 | True | malamute | 0.078253 | True | kelpie | 0.031379 | True |
2073 | 892177421306343426 | https://pbs.twimg.com/media/DGGmoV4XsAAUL6n.jpg | 1 | Chihuahua | 0.323581 | True | Pekinese | 0.090647 | True | papillon | 0.068957 | True |
2074 | 892420643555336193 | https://pbs.twimg.com/media/DGKD1-bXoAAIAUK.jpg | 1 | orange | 0.097049 | False | bagel | 0.085851 | False | banana | 0.076110 | False |
2075 rows × 12 columns
twitter_archive_enhanced.info()
RangeIndex: 2356 entries, 0 to 2355
Data columns (total 17 columns):
tweet_id 2356 non-null int64
in_reply_to_status_id 78 non-null float64
in_reply_to_user_id 78 non-null float64
timestamp 2356 non-null object
source 2356 non-null object
text 2356 non-null object
retweeted_status_id 181 non-null float64
retweeted_status_user_id 181 non-null float64
retweeted_status_timestamp 181 non-null object
expanded_urls 2297 non-null object
rating_numerator 2356 non-null int64
rating_denominator 2356 non-null int64
name 2356 non-null object
doggo 2356 non-null object
floofer 2356 non-null object
pupper 2356 non-null object
puppo 2356 non-null object
dtypes: float64(4), int64(3), object(10)
memory usage: 313.0+ KB
extra_data.info()
RangeIndex: 2352 entries, 0 to 2351
Data columns (total 3 columns):
tweet_id 2352 non-null object
retweet_count 2352 non-null int64
favorite_count 2352 non-null int64
dtypes: int64(2), object(1)
memory usage: 55.2+ KB
image_predictions.jpg_url.isnull().value_counts()
False 2075
Name: jpg_url, dtype: int64
image_predictions
tweet_id | jpg_url | img_num | p1 | p1_conf | p1_dog | p2 | p2_conf | p2_dog | p3 | p3_conf | p3_dog | |
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 666020888022790149 | https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg | 1 | Welsh_springer_spaniel | 0.465074 | True | collie | 0.156665 | True | Shetland_sheepdog | 0.061428 | True |
1 | 666029285002620928 | https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg | 1 | redbone | 0.506826 | True | miniature_pinscher | 0.074192 | True | Rhodesian_ridgeback | 0.072010 | True |
2 | 666033412701032449 | https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg | 1 | German_shepherd | 0.596461 | True | malinois | 0.138584 | True | bloodhound | 0.116197 | True |
3 | 666044226329800704 | https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg | 1 | Rhodesian_ridgeback | 0.408143 | True | redbone | 0.360687 | True | miniature_pinscher | 0.222752 | True |
4 | 666049248165822465 | https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg | 1 | miniature_pinscher | 0.560311 | True | Rottweiler | 0.243682 | True | Doberman | 0.154629 | True |
5 | 666050758794694657 | https://pbs.twimg.com/media/CT5Jof1WUAEuVxN.jpg | 1 | Bernese_mountain_dog | 0.651137 | True | English_springer | 0.263788 | True | Greater_Swiss_Mountain_dog | 0.016199 | True |
6 | 666051853826850816 | https://pbs.twimg.com/media/CT5KoJ1WoAAJash.jpg | 1 | box_turtle | 0.933012 | False | mud_turtle | 0.045885 | False | terrapin | 0.017885 | False |
7 | 666055525042405380 | https://pbs.twimg.com/media/CT5N9tpXIAAifs1.jpg | 1 | chow | 0.692517 | True | Tibetan_mastiff | 0.058279 | True | fur_coat | 0.054449 | False |
8 | 666057090499244032 | https://pbs.twimg.com/media/CT5PY90WoAAQGLo.jpg | 1 | shopping_cart | 0.962465 | False | shopping_basket | 0.014594 | False | golden_retriever | 0.007959 | True |
9 | 666058600524156928 | https://pbs.twimg.com/media/CT5Qw94XAAA_2dP.jpg | 1 | miniature_poodle | 0.201493 | True | komondor | 0.192305 | True | soft-coated_wheaten_terrier | 0.082086 | True |
10 | 666063827256086533 | https://pbs.twimg.com/media/CT5Vg_wXIAAXfnj.jpg | 1 | golden_retriever | 0.775930 | True | Tibetan_mastiff | 0.093718 | True | Labrador_retriever | 0.072427 | True |
11 | 666071193221509120 | https://pbs.twimg.com/media/CT5cN_3WEAAlOoZ.jpg | 1 | Gordon_setter | 0.503672 | True | Yorkshire_terrier | 0.174201 | True | Pekinese | 0.109454 | True |
12 | 666073100786774016 | https://pbs.twimg.com/media/CT5d9DZXAAALcwe.jpg | 1 | Walker_hound | 0.260857 | True | English_foxhound | 0.175382 | True | Ibizan_hound | 0.097471 | True |
13 | 666082916733198337 | https://pbs.twimg.com/media/CT5m4VGWEAAtKc8.jpg | 1 | pug | 0.489814 | True | bull_mastiff | 0.404722 | True | French_bulldog | 0.048960 | True |
14 | 666094000022159362 | https://pbs.twimg.com/media/CT5w9gUW4AAsBNN.jpg | 1 | bloodhound | 0.195217 | True | German_shepherd | 0.078260 | True | malinois | 0.075628 | True |
15 | 666099513787052032 | https://pbs.twimg.com/media/CT51-JJUEAA6hV8.jpg | 1 | Lhasa | 0.582330 | True | Shih-Tzu | 0.166192 | True | Dandie_Dinmont | 0.089688 | True |
16 | 666102155909144576 | https://pbs.twimg.com/media/CT54YGiWUAEZnoK.jpg | 1 | English_setter | 0.298617 | True | Newfoundland | 0.149842 | True | borzoi | 0.133649 | True |
17 | 666104133288665088 | https://pbs.twimg.com/media/CT56LSZWoAAlJj2.jpg | 1 | hen | 0.965932 | False | cock | 0.033919 | False | partridge | 0.000052 | False |
18 | 666268910803644416 | https://pbs.twimg.com/media/CT8QCd1WEAADXws.jpg | 1 | desktop_computer | 0.086502 | False | desk | 0.085547 | False | bookcase | 0.079480 | False |
19 | 666273097616637952 | https://pbs.twimg.com/media/CT8T1mtUwAA3aqm.jpg | 1 | Italian_greyhound | 0.176053 | True | toy_terrier | 0.111884 | True | basenji | 0.111152 | True |
20 | 666287406224695296 | https://pbs.twimg.com/media/CT8g3BpUEAAuFjg.jpg | 1 | Maltese_dog | 0.857531 | True | toy_poodle | 0.063064 | True | miniature_poodle | 0.025581 | True |
21 | 666293911632134144 | https://pbs.twimg.com/media/CT8mx7KW4AEQu8N.jpg | 1 | three-toed_sloth | 0.914671 | False | otter | 0.015250 | False | great_grey_owl | 0.013207 | False |
22 | 666337882303524864 | https://pbs.twimg.com/media/CT9OwFIWEAMuRje.jpg | 1 | ox | 0.416669 | False | Newfoundland | 0.278407 | True | groenendael | 0.102643 | True |
23 | 666345417576210432 | https://pbs.twimg.com/media/CT9Vn7PWoAA_ZCM.jpg | 1 | golden_retriever | 0.858744 | True | Chesapeake_Bay_retriever | 0.054787 | True | Labrador_retriever | 0.014241 | True |
24 | 666353288456101888 | https://pbs.twimg.com/media/CT9cx0tUEAAhNN_.jpg | 1 | malamute | 0.336874 | True | Siberian_husky | 0.147655 | True | Eskimo_dog | 0.093412 | True |
25 | 666362758909284353 | https://pbs.twimg.com/media/CT9lXGsUcAAyUFt.jpg | 1 | guinea_pig | 0.996496 | False | skunk | 0.002402 | False | hamster | 0.000461 | False |
26 | 666373753744588802 | https://pbs.twimg.com/media/CT9vZEYWUAAlZ05.jpg | 1 | soft-coated_wheaten_terrier | 0.326467 | True | Afghan_hound | 0.259551 | True | briard | 0.206803 | True |
27 | 666396247373291520 | https://pbs.twimg.com/media/CT-D2ZHWIAA3gK1.jpg | 1 | Chihuahua | 0.978108 | True | toy_terrier | 0.009397 | True | papillon | 0.004577 | True |
28 | 666407126856765440 | https://pbs.twimg.com/media/CT-NvwmW4AAugGZ.jpg | 1 | black-and-tan_coonhound | 0.529139 | True | bloodhound | 0.244220 | True | flat-coated_retriever | 0.173810 | True |
29 | 666411507551481857 | https://pbs.twimg.com/media/CT-RugiWIAELEaq.jpg | 1 | coho | 0.404640 | False | barracouta | 0.271485 | False | gar | 0.189945 | False |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2045 | 886366144734445568 | https://pbs.twimg.com/media/DE0BTnQUwAApKEH.jpg | 1 | French_bulldog | 0.999201 | True | Chihuahua | 0.000361 | True | Boston_bull | 0.000076 | True |
2046 | 886680336477933568 | https://pbs.twimg.com/media/DE4fEDzWAAAyHMM.jpg | 1 | convertible | 0.738995 | False | sports_car | 0.139952 | False | car_wheel | 0.044173 | False |
2047 | 886736880519319552 | https://pbs.twimg.com/media/DE5Se8FXcAAJFx4.jpg | 1 | kuvasz | 0.309706 | True | Great_Pyrenees | 0.186136 | True | Dandie_Dinmont | 0.086346 | True |
2048 | 886983233522544640 | https://pbs.twimg.com/media/DE8yicJW0AAAvBJ.jpg | 2 | Chihuahua | 0.793469 | True | toy_terrier | 0.143528 | True | can_opener | 0.032253 | False |
2049 | 887101392804085760 | https://pbs.twimg.com/media/DE-eAq6UwAA-jaE.jpg | 1 | Samoyed | 0.733942 | True | Eskimo_dog | 0.035029 | True | Staffordshire_bullterrier | 0.029705 | True |
2050 | 887343217045368832 | https://pbs.twimg.com/ext_tw_video_thumb/887343120832229379/pu/img/6HSuFrW1lzI_9Mht.jpg | 1 | Mexican_hairless | 0.330741 | True | sea_lion | 0.275645 | False | Weimaraner | 0.134203 | True |
2051 | 887473957103951883 | https://pbs.twimg.com/media/DFDw2tyUQAAAFke.jpg | 2 | Pembroke | 0.809197 | True | Rhodesian_ridgeback | 0.054950 | True | beagle | 0.038915 | True |
2052 | 887517139158093824 | https://pbs.twimg.com/ext_tw_video_thumb/887517108413886465/pu/img/WanJKwssZj4VJvL9.jpg | 1 | limousine | 0.130432 | False | tow_truck | 0.029175 | False | shopping_cart | 0.026321 | False |
2053 | 887705289381826560 | https://pbs.twimg.com/media/DFHDQBbXgAEqY7t.jpg | 1 | basset | 0.821664 | True | redbone | 0.087582 | True | Weimaraner | 0.026236 | True |
2054 | 888078434458587136 | https://pbs.twimg.com/media/DFMWn56WsAAkA7B.jpg | 1 | French_bulldog | 0.995026 | True | pug | 0.000932 | True | bull_mastiff | 0.000903 | True |
2055 | 888202515573088257 | https://pbs.twimg.com/media/DFDw2tyUQAAAFke.jpg | 2 | Pembroke | 0.809197 | True | Rhodesian_ridgeback | 0.054950 | True | beagle | 0.038915 | True |
2056 | 888554962724278272 | https://pbs.twimg.com/media/DFTH_O-UQAACu20.jpg | 3 | Siberian_husky | 0.700377 | True | Eskimo_dog | 0.166511 | True | malamute | 0.111411 | True |
2057 | 888804989199671297 | https://pbs.twimg.com/media/DFWra-3VYAA2piG.jpg | 1 | golden_retriever | 0.469760 | True | Labrador_retriever | 0.184172 | True | English_setter | 0.073482 | True |
2058 | 888917238123831296 | https://pbs.twimg.com/media/DFYRgsOUQAARGhO.jpg | 1 | golden_retriever | 0.714719 | True | Tibetan_mastiff | 0.120184 | True | Labrador_retriever | 0.105506 | True |
2059 | 889278841981685760 | https://pbs.twimg.com/ext_tw_video_thumb/889278779352338437/pu/img/VlbFB3v8H8VwzVNY.jpg | 1 | whippet | 0.626152 | True | borzoi | 0.194742 | True | Saluki | 0.027351 | True |
2060 | 889531135344209921 | https://pbs.twimg.com/media/DFg_2PVW0AEHN3p.jpg | 1 | golden_retriever | 0.953442 | True | Labrador_retriever | 0.013834 | True | redbone | 0.007958 | True |
2061 | 889638837579907072 | https://pbs.twimg.com/media/DFihzFfXsAYGDPR.jpg | 1 | French_bulldog | 0.991650 | True | boxer | 0.002129 | True | Staffordshire_bullterrier | 0.001498 | True |
2062 | 889665388333682689 | https://pbs.twimg.com/media/DFi579UWsAAatzw.jpg | 1 | Pembroke | 0.966327 | True | Cardigan | 0.027356 | True | basenji | 0.004633 | True |
2063 | 889880896479866881 | https://pbs.twimg.com/media/DFl99B1WsAITKsg.jpg | 1 | French_bulldog | 0.377417 | True | Labrador_retriever | 0.151317 | True | muzzle | 0.082981 | False |
2064 | 890006608113172480 | https://pbs.twimg.com/media/DFnwSY4WAAAMliS.jpg | 1 | Samoyed | 0.957979 | True | Pomeranian | 0.013884 | True | chow | 0.008167 | True |
2065 | 890240255349198849 | https://pbs.twimg.com/media/DFrEyVuW0AAO3t9.jpg | 1 | Pembroke | 0.511319 | True | Cardigan | 0.451038 | True | Chihuahua | 0.029248 | True |
2066 | 890609185150312448 | https://pbs.twimg.com/media/DFwUU__XcAEpyXI.jpg | 1 | Irish_terrier | 0.487574 | True | Irish_setter | 0.193054 | True | Chesapeake_Bay_retriever | 0.118184 | True |
2067 | 890729181411237888 | https://pbs.twimg.com/media/DFyBahAVwAAhUTd.jpg | 2 | Pomeranian | 0.566142 | True | Eskimo_dog | 0.178406 | True | Pembroke | 0.076507 | True |
2068 | 890971913173991426 | https://pbs.twimg.com/media/DF1eOmZXUAALUcq.jpg | 1 | Appenzeller | 0.341703 | True | Border_collie | 0.199287 | True | ice_lolly | 0.193548 | False |
2069 | 891087950875897856 | https://pbs.twimg.com/media/DF3HwyEWsAABqE6.jpg | 1 | Chesapeake_Bay_retriever | 0.425595 | True | Irish_terrier | 0.116317 | True | Indian_elephant | 0.076902 | False |
2070 | 891327558926688256 | https://pbs.twimg.com/media/DF6hr6BUMAAzZgT.jpg | 2 | basset | 0.555712 | True | English_springer | 0.225770 | True | German_short-haired_pointer | 0.175219 | True |
2071 | 891689557279858688 | https://pbs.twimg.com/media/DF_q7IAWsAEuuN8.jpg | 1 | paper_towel | 0.170278 | False | Labrador_retriever | 0.168086 | True | spatula | 0.040836 | False |
2072 | 891815181378084864 | https://pbs.twimg.com/media/DGBdLU1WsAANxJ9.jpg | 1 | Chihuahua | 0.716012 | True | malamute | 0.078253 | True | kelpie | 0.031379 | True |
2073 | 892177421306343426 | https://pbs.twimg.com/media/DGGmoV4XsAAUL6n.jpg | 1 | Chihuahua | 0.323581 | True | Pekinese | 0.090647 | True | papillon | 0.068957 | True |
2074 | 892420643555336193 | https://pbs.twimg.com/media/DGKD1-bXoAAIAUK.jpg | 1 | orange | 0.097049 | False | bagel | 0.085851 | False | banana | 0.076110 | False |
2075 rows × 12 columns
twitter_archive_enhanced.describe()
tweet_id | in_reply_to_status_id | in_reply_to_user_id | retweeted_status_id | retweeted_status_user_id | rating_numerator | rating_denominator | |
---|---|---|---|---|---|---|---|
count | 2.356000e+03 | 7.800000e+01 | 7.800000e+01 | 1.810000e+02 | 1.810000e+02 | 2356.000000 | 2356.000000 |
mean | 7.427716e+17 | 7.455079e+17 | 2.014171e+16 | 7.720400e+17 | 1.241698e+16 | 13.126486 | 10.455433 |
std | 6.856705e+16 | 7.582492e+16 | 1.252797e+17 | 6.236928e+16 | 9.599254e+16 | 45.876648 | 6.745237 |
min | 6.660209e+17 | 6.658147e+17 | 1.185634e+07 | 6.661041e+17 | 7.832140e+05 | 0.000000 | 0.000000 |
25% | 6.783989e+17 | 6.757419e+17 | 3.086374e+08 | 7.186315e+17 | 4.196984e+09 | 10.000000 | 10.000000 |
50% | 7.196279e+17 | 7.038708e+17 | 4.196984e+09 | 7.804657e+17 | 4.196984e+09 | 11.000000 | 10.000000 |
75% | 7.993373e+17 | 8.257804e+17 | 4.196984e+09 | 8.203146e+17 | 4.196984e+09 | 12.000000 | 10.000000 |
max | 8.924206e+17 | 8.862664e+17 | 8.405479e+17 | 8.874740e+17 | 7.874618e+17 | 1776.000000 | 170.000000 |
twitter_archive_enhanced.rating_denominator.value_counts()
10 2333
11 3
50 3
80 2
20 2
2 1
16 1
40 1
70 1
15 1
90 1
110 1
120 1
130 1
150 1
170 1
7 1
0 1
Name: rating_denominator, dtype: int64
twitter_archive_enhanced.name.value_counts()
None 745
a 55
Charlie 12
Cooper 11
Lucy 11
Oliver 11
Penny 10
Tucker 10
Lola 10
Winston 9
Bo 9
Sadie 8
the 8
Toby 7
Daisy 7
Bailey 7
Buddy 7
an 7
Scout 6
Dave 6
Bella 6
Rusty 6
Koda 6
Oscar 6
Stanley 6
Jax 6
Jack 6
Leo 6
Milo 6
Phil 5
...
Dunkin 1
Crimson 1
Brian 1
Randall 1
Snoopy 1
Puff 1
Sid 1
Huck 1
Pete 1
Antony 1
Stephanus 1
Striker 1
Rizzo 1
Marvin 1
Perry 1
Cleopatricia 1
Siba 1
Rontu 1
Boston 1
Filup 1
Deacon 1
Anna 1
Dudley 1
Jangle 1
Dallas 1
Emma 1
Izzy 1
Rascal 1
Willow 1
Alf 1
Name: name, Length: 957, dtype: int64
twitter_archive_enhanced[twitter_archive_enhanced.tweet_id.duplicated()]
tweet_id | in_reply_to_status_id | in_reply_to_user_id | timestamp | source | text | retweeted_status_id | retweeted_status_user_id | retweeted_status_timestamp | expanded_urls | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo |
---|
extra_data.describe()
retweet_count | favorite_count | |
---|---|---|
count | 2352.000000 | 2352.000000 |
mean | 3134.932398 | 8109.198980 |
std | 5237.846296 | 11980.795669 |
min | 0.000000 | 0.000000 |
25% | 618.000000 | 1417.000000 |
50% | 1456.500000 | 3596.500000 |
75% | 3628.750000 | 10118.000000 |
max | 79116.000000 | 132318.000000 |
image_predictions.describe()
tweet_id | img_num | p1_conf | p2_conf | p3_conf | |
---|---|---|---|---|---|
count | 2.075000e+03 | 2075.000000 | 2075.000000 | 2.075000e+03 | 2.075000e+03 |
mean | 7.384514e+17 | 1.203855 | 0.594548 | 1.345886e-01 | 6.032417e-02 |
std | 6.785203e+16 | 0.561875 | 0.271174 | 1.006657e-01 | 5.090593e-02 |
min | 6.660209e+17 | 1.000000 | 0.044333 | 1.011300e-08 | 1.740170e-10 |
25% | 6.764835e+17 | 1.000000 | 0.364412 | 5.388625e-02 | 1.622240e-02 |
50% | 7.119988e+17 | 1.000000 | 0.588230 | 1.181810e-01 | 4.944380e-02 |
75% | 7.932034e+17 | 1.000000 | 0.843855 | 1.955655e-01 | 9.180755e-02 |
max | 8.924206e+17 | 4.000000 | 1.000000 | 4.880140e-01 | 2.734190e-01 |
image_predictions[image_predictions.tweet_id.duplicated()]
tweet_id | jpg_url | img_num | p1 | p1_conf | p1_dog | p2 | p2_conf | p2_dog | p3 | p3_conf | p3_dog |
---|
twitter_archive_enhanced
表:in_reply_to_status_id
,in_reply_to_user_id
,retweeted_status_id
,retweeted_status_user_id
,retweeted_status_timestamp
,expanded_urls
)rating_denominator
列)rating_numerator
列中,数据提取有误,如:11.27/10 分子提取的是27,11.26\10,分子提取的是26/10tweet_id
和timestamp
列)image_predictions
表:tweet_id
列)twitter_archive_enhanced
表:doggo
,floofer
,pupper
,puppo
四个列标题是值# 创建各表的副本
twitter_archive_enhanced_clean = twitter_archive_enhanced.copy()
extra_data_clean = extra_data.copy()
image_predictions_clean = image_predictions.copy()
twitter_archive_enhanced
数据集中存在转发的条目及无图片推特(项目动机中要求我们只需要含有图片的原始评级 (不包括转发))通过 isnull()
函数筛选出retweeted_status_id
, retweeted_status_user_id
和 retweeted_status_timestamp
这三列为空值的行
通过 notnull()
函数筛选出expanded_urls
列不为空值的行
#删除转发内容
twitter_archive_enhanced_clean = twitter_archive_enhanced_clean[twitter_archive_enhanced_clean.retweeted_status_id.isnull()&
twitter_archive_enhanced_clean.retweeted_status_user_id.isnull()&
twitter_archive_enhanced_clean.retweeted_status_timestamp.isnull()]
#删除无图片推特
twitter_archive_enhanced_clean = twitter_archive_enhanced_clean[twitter_archive_enhanced_clean.expanded_urls.notnull()]
twitter_archive_enhanced_clean.info()
Int64Index: 2117 entries, 0 to 2355
Data columns (total 17 columns):
tweet_id 2117 non-null int64
in_reply_to_status_id 23 non-null float64
in_reply_to_user_id 23 non-null float64
timestamp 2117 non-null object
source 2117 non-null object
text 2117 non-null object
retweeted_status_id 0 non-null float64
retweeted_status_user_id 0 non-null float64
retweeted_status_timestamp 0 non-null object
expanded_urls 2117 non-null object
rating_numerator 2117 non-null int64
rating_denominator 2117 non-null int64
name 2117 non-null object
doggo 2117 non-null object
floofer 2117 non-null object
pupper 2117 non-null object
puppo 2117 non-null object
dtypes: float64(4), int64(3), object(10)
memory usage: 297.7+ KB
twitter_archive_enhanced
:如下列数据缺失(in_reply_to_status_id
,in_reply_to_user_id
,retweeted_status_id
,retweeted_status_user_id
,retweeted_status_timestamp
,expanded_urls
)
这几列数据虽然缺失,但是对于我们之后的分析没有多大意义,可以通过 .drop
函数将其删除,source列也可以删除掉。
twitter_archive_enhanced_clean.drop(["in_reply_to_status_id","in_reply_to_user_id","retweeted_status_id","source",
"retweeted_status_user_id","retweeted_status_timestamp","expanded_urls"],axis=1,inplace=True)
# 要删除列,所以轴的值为1,inplace=True,在原表上进行修改
twitter_archive_enhanced_clean.head()
tweet_id | timestamp | text | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | |
---|---|---|---|---|---|---|---|---|---|---|
0 | 892420643555336193 | 2017-08-01 16:23:56 +0000 | This is Phineas. He's a mystical boy. Only ever appears in the hole of a donut. 13/10 https://t.co/MgUWQ76dJU | 13 | 10 | Phineas | None | None | None | None |
1 | 892177421306343426 | 2017-08-01 00:17:27 +0000 | This is Tilly. She's just checking pup on you. Hopes you're doing ok. If not, she's available for pats, snugs, boops, the whole bit. 13/10 https://t.co/0Xxu71qeIV | 13 | 10 | Tilly | None | None | None | None |
2 | 891815181378084864 | 2017-07-31 00:18:03 +0000 | This is Archie. He is a rare Norwegian Pouncing Corgo. Lives in the tall grass. You never know when one may strike. 12/10 https://t.co/wUnZnhtVJB | 12 | 10 | Archie | None | None | None | None |
3 | 891689557279858688 | 2017-07-30 15:58:51 +0000 | This is Darla. She commenced a snooze mid meal. 13/10 happens to the best of us https://t.co/tD36da7qLQ | 13 | 10 | Darla | None | None | None | None |
4 | 891327558926688256 | 2017-07-29 16:00:24 +0000 | This is Franklin. He would like you to stop calling him "cute." He is a very fierce shark and should be respected as such. 12/10 #BarkWeek https://t.co/AtUZn91f7f | 12 | 10 | Franklin | None | None | None | None |
twitter_archive_enhanced
:狗狗评分数据不完整,分母有不为10的数据(rating_denominator
列,text列中有多个狗狗的评分数据重新提取:
通过str.findall
函数使用正则表达从text
列中提取狗狗评分
#twitter_archive_enhanced_clean['rate'] = twitter_archive_enhanced_clean.text.str.extract('(\d+\.?\d+\/10)',expand=True)
#def extract_num(string):
#rating = []
#string = string.split()
#for x in string:
#match = re.search(r'\d+\.?\d+\/d+',x)
#if match:
#rating.append(match.group())
#return rating
#twitter_archive_enhanced_clean.text.apply(extract_num)
## 如何让多个列的值连接到一起
#for string in twitter_archive_enhanced_clean.text:
#extract_num(string)
# 通过正则表达式提取出狗狗评分,并赋值给"rating"列
#正则表达如下 ('(\d+\.?\d+\/10)')这样为什么匹配不到分子是个位数的数,('((?:\d+\.)?\d+\/10)')却可以
twitter_archive_enhanced_clean['rating'] = twitter_archive_enhanced_clean.text.str.findall('((?:\d+\.)?\d+\/10)')
twitter_archive_enhanced_clean.info()
Int64Index: 2117 entries, 0 to 2355
Data columns (total 11 columns):
tweet_id 2117 non-null int64
timestamp 2117 non-null object
text 2117 non-null object
rating_numerator 2117 non-null int64
rating_denominator 2117 non-null int64
name 2117 non-null object
doggo 2117 non-null object
floofer 2117 non-null object
pupper 2117 non-null object
puppo 2117 non-null object
rating 2117 non-null object
dtypes: int64(3), object(8)
memory usage: 198.5+ KB
# 查看提取情况,结果为列表
twitter_archive_enhanced_clean['rating']
0 [13/10]
1 [13/10]
2 [12/10]
3 [13/10]
4 [12/10]
5 [13/10]
6 [13/10]
7 [13/10]
8 [13/10]
9 [14/10]
10 [13/10]
11 [13/10]
12 [13/10]
13 [12/10]
14 [13/10]
15 [13/10]
16 [12/10]
17 [13/10]
18 [13/10]
20 [12/10]
21 [13/10]
22 [14/10]
23 [13/10]
24 [13/10]
25 [12/10]
26 [13/10]
27 [13/10]
28 [13/10]
29 [12/10]
31 [13/10]
...
2326 [2/10]
2327 [7/10]
2328 [9/10]
2329 [11/10]
2330 [6/10]
2331 [8/10]
2332 [10/10]
2333 [9/10]
2334 [3/10]
2335 [9/10]
2336 [11/10]
2337 [10/10]
2338 [1/10]
2339 [11/10]
2340 [8/10]
2341 [9/10]
2342 [6/10]
2343 [10/10]
2344 [9/10]
2345 [10/10]
2346 [8/10]
2347 [9/10]
2348 [10/10]
2349 [2/10]
2350 [10/10]
2351 [5/10]
2352 [6/10]
2353 [9/10]
2354 [7/10]
2355 [8/10]
Name: rating, Length: 2117, dtype: object
# 查看是否有多个评分存在
for rate in twitter_archive_enhanced_clean.rating:
if len(rate) >1:
print(rate)
['12/10', '11/10']
['10/10', '7/10']
['10/10', '8/10']
['9/10', '2/10']
['4/10', '13/10']
['10/10', '5/10']
['5/10', '10/10']
['10/10', '6/10']
['11/10', '10/10']
['10/10', '11/10']
['10/10', '7/10']
['10/10', '4/10']
['5/10', '8/10']
['8/10', '11/10']
['10/10', '7/10', '12/10']
['11/10', '8/10']
['11/10', '8/10']
['10/10', '7/10']
['8/10', '1/10']
['10/10', '4/10']
['7/10', '8/10']
['10/10', '10/10']
# 将多个评分的结果通过“&”符号连接,因为函数功能比较简单,通过apply中匿名函数lambda实现
# 通过上述结果可以看到有提取出两个相同的结果,所以利用集合set进行去重
twitter_archive_enhanced_clean['rating'] = twitter_archive_enhanced_clean['rating'].apply(lambda x:"&".join(set(x)))
twitter_archive_enhanced_clean['rating'].value_counts()
12/10 488
10/10 427
11/10 415
13/10 296
9/10 153
8/10 96
7/10 50
14/10 41
6/10 32
5/10 31
3/10 19
4/10 14
13
2/10 9
1/10 4
7/10&10/10 3
8/10&11/10 2
0/10 2
10/10&11/10 2
5/10&10/10 2
10/10&4/10 2
12/10&11/10 1
1776/10 1
11/10&8/10 1
13/10&4/10 1
9.75/10 1
13.5/10 1
12/10&7/10&10/10 1
6/10&10/10 1
11.26/10 1
11.27/10 1
420/10 1
1/10&8/10 1
7/10&8/10 1
5/10&8/10 1
10/10&8/10 1
9/10&2/10 1
Name: rating, dtype: int64
# 打印出有多个评分结果的数据,根据"text"列内容,手动去除多余的评分
twitter_archive_enhanced_clean[twitter_archive_enhanced_clean['rating'].str.len()>7]
tweet_id | timestamp | text | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | rating | |
---|---|---|---|---|---|---|---|---|---|---|---|
763 | 778027034220126208 | 2016-09-20 00:24:34 +0000 | This is Sophie. She's a Jubilant Bush Pupper. Super h*ckin rare. Appears at random just to smile at the locals. 11.27/10 would smile back https://t.co/QFaUiIHxHq | 27 | 10 | Sophie | None | None | pupper | None | 11.27/10 |
766 | 777684233540206592 | 2016-09-19 01:42:24 +0000 | "Yep... just as I suspected. You're not flossing." 12/10 and 11/10 for the pup not flossing https://t.co/SuXcI9B7pQ | 12 | 10 | None | None | None | None | None | 12/10&11/10 |
1007 | 747600769478692864 | 2016-06-28 01:21:27 +0000 | This is Bookstore and Seaweed. Bookstore is tired and Seaweed is an asshole. 10/10 and 7/10 respectively https://t.co/eUGjGjjFVJ | 10 | 10 | Bookstore | None | None | None | None | 7/10&10/10 |
1222 | 714258258790387713 | 2016-03-28 01:10:13 +0000 | Meet Travis and Flurp. Travis is pretty chill but Flurp can't lie down properly. 10/10 & 8/10\nget it together Flurp https://t.co/Akzl5ynMmE | 10 | 10 | Travis | None | None | None | None | 10/10&8/10 |
1359 | 703356393781329922 | 2016-02-26 23:10:06 +0000 | This is Socks. That water pup w the super legs just splashed him. Socks did not appreciate that. 9/10 and 2/10 https://t.co/8rc5I22bBf | 9 | 10 | Socks | None | None | None | None | 9/10&2/10 |
1459 | 695064344191721472 | 2016-02-04 02:00:27 +0000 | This may be the greatest video I've ever been sent. 4/10 for Charles the puppy, 13/10 overall. (Vid by @stevenxx_) https://t.co/uaJmNgXR2P | 4 | 10 | None | None | None | None | None | 13/10&4/10 |
1465 | 694352839993344000 | 2016-02-02 02:53:12 +0000 | Meet Oliviér. He takes killer selfies. Has a dog of his own. It leaps at random & can't bark for shit. 10/10 & 5/10 https://t.co/6NgsQJuSBJ | 10 | 10 | Oliviér | None | None | None | None | 5/10&10/10 |
1508 | 691483041324204033 | 2016-01-25 04:49:38 +0000 | When bae says they can't go out but you see them with someone else that same night. 5/10 & 10/10 for heartbroken pup https://t.co/aenk0KpoWM | 5 | 10 | None | None | None | None | None | 5/10&10/10 |
1525 | 690400367696297985 | 2016-01-22 05:07:29 +0000 | This is Eriq. His friend just reminded him of last year's super bowl. Not cool friend\n10/10 for Eriq\n6/10 for friend https://t.co/PlEXTofdpf | 10 | 10 | Eriq | None | None | None | None | 6/10&10/10 |
1538 | 689835978131935233 | 2016-01-20 15:44:48 +0000 | Meet Fynn & Taco. Fynn is an all-powerful leaf lord and Taco is in the wrong place at the wrong time. 11/10 & 10/10 https://t.co/MuqHPvtL8c | 11 | 10 | Fynn | None | None | None | None | 10/10&11/10 |
1712 | 680494726643068929 | 2015-12-25 21:06:00 +0000 | Here we have uncovered an entire battalion of holiday puppers. Average of 11.26/10 https://t.co/eNm2S6p9BD | 26 | 10 | None | None | None | None | None | 11.26/10 |
1795 | 677314812125323265 | 2015-12-17 02:30:09 +0000 | Meet Tassy & Bee. Tassy is pretty chill, but Bee is convinced the Ruffles are haunted. 10/10 & 11/10 respectively https://t.co/fgORpmTN9C | 10 | 10 | Tassy | None | None | None | None | 10/10&11/10 |
1832 | 676191832485810177 | 2015-12-14 00:07:50 +0000 | These two pups just met and have instantly bonded. Spectacular scene. Mesmerizing af. 10/10 and 7/10 for blue dog https://t.co/gwryaJO4tC | 10 | 10 | None | None | None | None | None | 7/10&10/10 |
1897 | 674737130913071104 | 2015-12-09 23:47:22 +0000 | Meet Rufio. He is unaware of the pink legless pupper wrapped around him. Might want to get that checked 10/10 & 4/10 https://t.co/KNfLnYPmYh | 10 | 10 | Rufio | None | None | pupper | None | 10/10&4/10 |
1901 | 674646392044941312 | 2015-12-09 17:46:48 +0000 | Two gorgeous dogs here. Little waddling dog is a rebel. Refuses to look at camera. Must be a preteen. 5/10 & 8/10 https://t.co/YPfw7oahbD | 5 | 10 | None | None | None | None | None | 5/10&8/10 |
1970 | 673295268553605120 | 2015-12-06 00:17:55 +0000 | Meet Eve. She's a raging alcoholic 8/10 (would b 11/10 but pupper alcoholism is a tragic issue that I can't condone) https://t.co/U36HYQIijg | 8 | 10 | Eve | None | None | pupper | None | 11/10&8/10 |
2010 | 672248013293752320 | 2015-12-03 02:56:30 +0000 | 10/10 for dog. 7/10 for cat. 12/10 for human. Much skill. Would pet all https://t.co/uhx5gfpx5k | 10 | 10 | None | None | None | None | None | 12/10&7/10&10/10 |
2064 | 671154572044468225 | 2015-11-30 02:31:34 +0000 | Meet Holly. She's trying to teach small human-like pup about blocks but he's not paying attention smh. 11/10 & 8/10 https://t.co/RcksaUrGNu | 11 | 10 | Holly | None | None | None | None | 8/10&11/10 |
2113 | 670434127938719744 | 2015-11-28 02:48:46 +0000 | Meet Hank and Sully. Hank is very proud of the pumpkin they found and Sully doesn't give a shit. 11/10 and 8/10 https://t.co/cwoP1ftbrj | 11 | 10 | Hank | None | None | None | None | 8/10&11/10 |
2177 | 669037058363662336 | 2015-11-24 06:17:19 +0000 | Here we have Pancho and Peaches. Pancho is a Condoleezza Gryffindor, and Peaches is just an asshole. 10/10 & 7/10 https://t.co/Lh1BsJrWPp | 10 | 10 | None | None | None | None | None | 7/10&10/10 |
2216 | 668537837512433665 | 2015-11-22 21:13:35 +0000 | This is Spark. He's nervous. Other dog hasn't moved in a while. Won't come when called. Doesn't fetch well 8/10&1/10 https://t.co/stEodX9Aba | 8 | 10 | Spark | None | None | None | None | 1/10&8/10 |
2263 | 667544320556335104 | 2015-11-20 03:25:43 +0000 | This is Kial. Kial is either wearing a cape, which would be rad, or flashing us, which would be rude. 10/10 or 4/10 https://t.co/8zcwIoiuqR | 10 | 10 | Kial | None | None | None | None | 10/10&4/10 |
2272 | 667491009379606528 | 2015-11-19 23:53:52 +0000 | Two dogs in this one. Both are rare Jujitsu Pythagoreans. One slightly whiter than other. Long legs. 7/10 and 8/10 https://t.co/ITxxcc4v9y | 7 | 10 | None | None | None | None | None | 7/10&8/10 |
sum(twitter_archive_enhanced_clean['rating'].str.len()>7)
# 其中有两条数据是小数,长度大于7,实际需要修改21条数据
23
# 手动修改,去重
twitter_archive_enhanced_clean.loc[766,"rating"] = "11.5/10"
twitter_archive_enhanced_clean.loc[1007,"rating"] = "8.6/10"
twitter_archive_enhanced_clean.loc[1222,"rating"] = "9/10"
twitter_archive_enhanced_clean.loc[1359,"rating"] = "9/10"
twitter_archive_enhanced_clean.loc[1459,"rating"] = "4/10"
twitter_archive_enhanced_clean.loc[1465,"rating"] = "10/10"
twitter_archive_enhanced_clean.loc[1508,"rating"] = "5/10"
twitter_archive_enhanced_clean.loc[1525,"rating"] = "6/10"
twitter_archive_enhanced_clean.loc[1538,"rating"] = "10.5/10"
twitter_archive_enhanced_clean.loc[1795,"rating"] = "10.5/10"
twitter_archive_enhanced_clean.loc[1832,"rating"] = "8.5/10"
twitter_archive_enhanced_clean.loc[1897,"rating"] = "4/10"
twitter_archive_enhanced_clean.loc[1901,"rating"] = "6.5/10"
twitter_archive_enhanced_clean.loc[1970,"rating"] = "8/10"
twitter_archive_enhanced_clean.loc[2010,"rating"] = "10/10"
twitter_archive_enhanced_clean.loc[2064,"rating"] = "8/10"
twitter_archive_enhanced_clean.loc[2113,"rating"] = "9.5/10"
twitter_archive_enhanced_clean.loc[2177,"rating"] = "8.5/10"
twitter_archive_enhanced_clean.loc[2216,"rating"] = "8/10"
twitter_archive_enhanced_clean.loc[2263,"rating"] = "7/10"
twitter_archive_enhanced_clean.loc[2272,"rating"] = "7.5/10"
twitter_archive_enhanced_clean[twitter_archive_enhanced_clean['rating'].str.len()>7]
tweet_id | timestamp | text | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | rating | |
---|---|---|---|---|---|---|---|---|---|---|---|
763 | 778027034220126208 | 2016-09-20 00:24:34 +0000 | This is Sophie. She's a Jubilant Bush Pupper. Super h*ckin rare. Appears at random just to smile at the locals. 11.27/10 would smile back https://t.co/QFaUiIHxHq | 27 | 10 | Sophie | None | None | pupper | None | 11.27/10 |
1712 | 680494726643068929 | 2015-12-25 21:06:00 +0000 | Here we have uncovered an entire battalion of holiday puppers. Average of 11.26/10 https://t.co/eNm2S6p9BD | 26 | 10 | None | None | None | None | None | 11.26/10 |
# 通过/分隔符进行分隔,提取分子,赋值给"rating"列
twitter_archive_enhanced_clean["rating"] = twitter_archive_enhanced_clean.rating.str.split("/").str[0]
#将"rating"列的数据类型转换为数字类型
twitter_archive_enhanced_clean["rating"] = pd.to_numeric(twitter_archive_enhanced_clean["rating"],errors='coerce')
# "rating"列缺失值
twitter_archive_enhanced_clean[twitter_archive_enhanced_clean.rating.isnull()]
tweet_id | timestamp | text | rating_numerator | rating_denominator | name | doggo | floofer | pupper | puppo | rating | |
---|---|---|---|---|---|---|---|---|---|---|---|
433 | 820690176645140481 | 2017-01-15 17:52:40 +0000 | The floofs have been released I repeat the floofs have been released. 84/70 https://t.co/NIYC820tmd | 84 | 70 | None | None | None | None | None | NaN |
516 | 810984652412424192 | 2016-12-19 23:06:23 +0000 | Meet Sam. She smiles 24/7 & secretly aspires to be a reindeer. \nKeep Sam smiling by clicking and sharing this link:\nhttps://t.co/98tB8y7y7t https://t.co/LouL5vdvxx | 24 | 7 | Sam | None | None | None | None | NaN |
902 | 758467244762497024 | 2016-07-28 01:00:57 +0000 | Why does this never happen at my front door... 165/150 https://t.co/HmwrdfEfUE | 165 | 150 | None | None | None | None | None | NaN |
1120 | 731156023742988288 | 2016-05-13 16:15:54 +0000 | Say hello to this unbelievably well behaved squad of doggos. 204/170 would try to pet all at once https://t.co/yGQI3He3xv | 204 | 170 | this | None | None | None | None | NaN |
1228 | 713900603437621249 | 2016-03-27 01:29:02 +0000 | Happy Saturday here's 9 puppers on a bench. 99/90 good work everybody https://t.co/mpvaVxKmc1 | 99 | 90 | None | None | None | None | None | NaN |
1254 | 710658690886586372 | 2016-03-18 02:46:49 +0000 | Here's a brigade of puppers. All look very prepared for whatever happens next. 80/80 https://t.co/0eb7R1Om12 | 80 | 80 | None | None | None | None | None | NaN |
1274 | 709198395643068416 | 2016-03-14 02:04:08 +0000 | From left to right:\nCletus, Jerome, Alejandro, Burp, & Titson\nNone know where camera is. 45/50 would hug all at once https://t.co/sedre1ivTK | 45 | 50 | None | None | None | None | None | NaN |
1351 | 704054845121142784 | 2016-02-28 21:25:30 +0000 | Here is a whole flock of puppers. 60/50 I'll take the lot https://t.co/9dpcw6MdWa | 60 | 50 | a | None | None | None | None | NaN |
1433 | 697463031882764288 | 2016-02-10 16:51:59 +0000 | Happy Wednesday here's a bucket of pups. 44/40 would pet all at once https://t.co/HppvrYuamZ | 44 | 40 | None | None | None | None | None | NaN |
1634 | 684225744407494656 | 2016-01-05 04:11:44 +0000 | Two sneaky puppers were not initially seen, moving the rating to 143/130. Please forgive us. Thank you https://t.co/kRK51Y5ac3 | 143 | 130 | None | None | None | None | None | NaN |
1635 | 684222868335505415 | 2016-01-05 04:00:18 +0000 | Someone help the girl is being mugged. Several are distracting her while two steal her shoes. Clever puppers 121/110 https://t.co/1zfnTJLt55 | 121 | 110 | None | None | None | None | None | NaN |
1779 | 677716515794329600 | 2015-12-18 05:06:23 +0000 | IT'S PUPPERGEDDON. Total of 144/120 ...I think https://t.co/ZanVtAtvIq | 144 | 120 | None | None | None | None | None | NaN |
1843 | 675853064436391936 | 2015-12-13 01:41:41 +0000 | Here we have an entire platoon of puppers. Total score: 88/80 would pet all at once https://t.co/y93p6FLvVw | 88 | 80 | None | None | None | None | None | NaN |
## 将"rating_numerator","rating_denominator"列删除
twitter_archive_enhanced_clean.drop(["rating_numerator","rating_denominator"],axis = 1,inplace=True)
twitter_archive_enhanced_clean.head()
tweet_id | timestamp | text | name | doggo | floofer | pupper | puppo | rating | |
---|---|---|---|---|---|---|---|---|---|
0 | 892420643555336193 | 2017-08-01 16:23:56 +0000 | This is Phineas. He's a mystical boy. Only ever appears in the hole of a donut. 13/10 https://t.co/MgUWQ76dJU | Phineas | None | None | None | None | 13.0 |
1 | 892177421306343426 | 2017-08-01 00:17:27 +0000 | This is Tilly. She's just checking pup on you. Hopes you're doing ok. If not, she's available for pats, snugs, boops, the whole bit. 13/10 https://t.co/0Xxu71qeIV | Tilly | None | None | None | None | 13.0 |
2 | 891815181378084864 | 2017-07-31 00:18:03 +0000 | This is Archie. He is a rare Norwegian Pouncing Corgo. Lives in the tall grass. You never know when one may strike. 12/10 https://t.co/wUnZnhtVJB | Archie | None | None | None | None | 12.0 |
3 | 891689557279858688 | 2017-07-30 15:58:51 +0000 | This is Darla. She commenced a snooze mid meal. 13/10 happens to the best of us https://t.co/tD36da7qLQ | Darla | None | None | None | None | 13.0 |
4 | 891327558926688256 | 2017-07-29 16:00:24 +0000 | This is Franklin. He would like you to stop calling him "cute." He is a very fierce shark and should be respected as such. 12/10 #BarkWeek https://t.co/AtUZn91f7f | Franklin | None | None | None | None | 12.0 |
twitter_archive_enhanced_clean.rating.value_counts()
12.00 488
10.00 429
11.00 415
13.00 296
9.00 155
8.00 99
7.00 51
14.00 41
6.00 33
5.00 32
3.00 19
4.00 16
2.00 9
1.00 4
8.50 2
10.50 2
0.00 2
11.27 1
420.00 1
13.50 1
8.60 1
11.26 1
11.50 1
7.50 1
9.75 1
6.50 1
9.50 1
1776.00 1
Name: rating, dtype: int64
twitter_archive_enhance
:狗狗姓名缺失,且有"a",“the”,“an”,及小写字母开头的单词,如"quite"重新提取:
通过str.extract
函数使用正则表达从text
列中提取狗狗姓名
## 观察狗狗姓名的所处位置,可以发现“hello to”,"Meet","This is"语句后通常都是狗狗姓名.
## 狗狗姓名首字母都是大写,通过使用python正则表达的 "分组"及"环视"用法,构造搜索的正则表达式
twitter_archive_enhanced_clean['name'] = twitter_archive_enhanced_clean.text.str.extract('(?:This is|named|Meet|hello to|name is|Here we have|Here is)\s([A-Z].*?(?=\\.))',expand=True)
twitter_archive_enhanced_clean.name.value_counts()
Charlie 11
Cooper 10
Lucy 10
Tucker 9
Oliver 9
Lola 8
Penny 8
Winston 8
Daisy 7
Toby 7
Bailey 6
Bella 6
Stanley 6
Koda 6
Sadie 6
Oscar 5
Buddy 5
Dave 5
Leo 5
Bo 5
Jax 5
Scout 5
Louis 5
Rusty 5
Chip 4
Cassie 4
Finn 4
Milo 4
Gus 4
Duke 4
..
Travis and Flurp 1
Taco 1
Emma 1
Jangle 1
Jersey 1
Dudley 1
Moofasa 1
Hercules 1
Petrick 1
Yoda 1
Rooney 1
Fabio 1
Klein 1
Birf 1
Cheryl AKA Queen Pupper of the Skies 1
Huck 1
Antony 1
Stephanus 1
Lassie 1
Howard 1
Striker 1
Cali 1
Marvin 1
Perry 1
Cleopatricia 1
Siba 1
Pete 1
Boston 1
Deacon 1
Alf 1
Name: name, Length: 992, dtype: int64
## 对于有两个姓名的狗狗,将"and"换为连接符"&"
twitter_archive_enhanced_clean["name"] = twitter_archive_enhanced_clean.name.str.replace(r'(\s)and(\s)',"&")
## 同理,将"&"替换为"&"
twitter_archive_enhanced_clean["name"] = twitter_archive_enhanced_clean.name.str.replace('&',"&")
## 手动清洗如下情况的值
twitter_archive_enhanced_clean["name"] = twitter_archive_enhanced_clean.name.str.replace("Gary, Carrie Fisher's dog","Gary")
twitter_archive_enhanced_clean["name"] = twitter_archive_enhanced_clean.name.str.replace("her 2 pups"," ")
twitter_archive_enhanced_clean["name"] = twitter_archive_enhanced_clean.name.str.replace("his son"," ")
twitter_archive_enhanced_clean["name"] = twitter_archive_enhanced_clean.name.str.replace("her son"," ")
twitter_archive_enhanced_clean["name"] = twitter_archive_enhanced_clean.name.str.replace("Zeke the Wonder Dog","Zeke")
twitter_archive_enhanced_clean["name"] = twitter_archive_enhanced_clean.name.replace("None","np.nan")
twitter_archive_enhanced_clean[twitter_archive_enhanced_clean.name.isnull()]
tweet_id | timestamp | text | name | doggo | floofer | pupper | puppo | rating | |
---|---|---|---|---|---|---|---|---|---|
5 | 891087950875897856 | 2017-07-29 00:08:17 +0000 | Here we have a majestic great white breaching off South Africa's coast. Absolutely h*ckin breathtaking. 13/10 (IG: tucker_marlo) #BarkWeek https://t.co/kQ04fDDRmh | NaN | None | None | None | None | 13.0 |
7 | 890729181411237888 | 2017-07-28 00:22:40 +0000 | When you watch your owner call another dog a good boy but then they turn back to you and say you're a great boy. 13/10 https://t.co/v0nONBcwxq | NaN | None | None | None | None | 13.0 |
12 | 889665388333682689 | 2017-07-25 01:55:32 +0000 | Here's a puppo that seems to be on the fence about something haha no but seriously someone help her. 13/10 https://t.co/BxvuXk0UCm | NaN | None | None | None | puppo | 13.0 |
22 | 887517139158093824 | 2017-07-19 03:39:09 +0000 | I've yet to rate a Venezuelan Hover Wiener. This is such an honor. 14/10 paw-inspiring af (IG: roxy.thedoxy) https://t.co/20VrLAA8ba | NaN | None | None | None | None | 14.0 |
24 | 887343217045368832 | 2017-07-18 16:08:03 +0000 | You may not have known you needed to see this today. 13/10 please enjoy (IG: emmylouroo) https://t.co/WZqNqygEyV | NaN | None | None | None | None | 13.0 |
25 | 887101392804085760 | 2017-07-18 00:07:08 +0000 | This... is a Jubilant Antarctic House Bear. We only rate dogs. Please only send dogs. Thank you... 12/10 would suffocate in floof https://t.co/4Ad1jzJSdp | NaN | None | None | None | None | 12.0 |
37 | 885167619883638784 | 2017-07-12 16:03:00 +0000 | Here we have a corgi undercover as a malamute. Pawbably doing important investigative work. Zero control over tongue happenings. 13/10 https://t.co/44ItaMubBf | NaN | None | None | None | None | 13.0 |
41 | 884441805382717440 | 2017-07-10 15:58:53 +0000 | I present to you, Pup in Hat. Pup in Hat is great for all occasions. Extremely versatile. Compact as h*ck. 14/10 (IG: itselizabethgales) https://t.co/vvBOcC2VdC | NaN | None | None | None | None | 14.0 |
42 | 884247878851493888 | 2017-07-10 03:08:17 +0000 | OMG HE DIDN'T MEAN TO HE WAS JUST TRYING A LITTLE BARKOUR HE'S SUPER SORRY 13/10 WOULD FORGIVE IMMEDIATE https://t.co/uF3pQ8Wubj | NaN | None | None | None | None | 13.0 |
47 | 883117836046086144 | 2017-07-07 00:17:54 +0000 | Please only send dogs. We don't rate mechanics, no matter how h*ckin good. Thank you... 13/10 would sneak a pat https://t.co/Se5fZ9wp5E | NaN | None | None | None | None | 13.0 |
56 | 881536004380872706 | 2017-07-02 15:32:16 +0000 | Here is a pupper approaching maximum borkdrive. Zooming at never before seen speeds. 14/10 paw-inspiring af \n(IG: puffie_the_chow) https://t.co/ghXBIIeQZF | NaN | None | None | pupper | None | 14.0 |
59 | 880872448815771648 | 2017-06-30 19:35:32 +0000 | Ugh not again. We only rate dogs. Please don't send in well-dressed floppy-tongued street penguins. Dogs only please. Thank you... 12/10 https://t.co/WiAMbTkDPf | NaN | None | None | None | None | 12.0 |
62 | 880095782870896641 | 2017-06-28 16:09:20 +0000 | Please don't send in photos without dogs in them. We're not @porch_rates. Insubordinate and churlish. Pretty good porch tho 11/10 https://t.co/HauE8M3Bu4 | NaN | None | None | None | None | 11.0 |
72 | 878604707211726852 | 2017-06-24 13:24:20 +0000 | Martha is stunning how h*ckin dare you. 13/10 https://t.co/9uABQXgjwa | NaN | None | None | None | None | 13.0 |
83 | 876537666061221889 | 2017-06-18 20:30:39 +0000 | I can say with the pupmost confidence that the doggos who assisted with this search are heroic as h*ck. 14/10 for all https://t.co/8yoc1CNTsu | NaN | None | None | None | None | 14.0 |
88 | 875097192612077568 | 2017-06-14 21:06:43 +0000 | You'll get your package when that precious man is done appreciating the pups. 13/10 for everyone https://t.co/PFp4MghzBW | NaN | None | None | None | None | 13.0 |
89 | 875021211251597312 | 2017-06-14 16:04:48 +0000 | Guys please stop sending pictures without any dogs in th- oh never mind hello excuse me sir. 12/10 stealthy as h*ck https://t.co/brCQoqc8AW | NaN | None | None | None | None | 12.0 |
93 | 874057562936811520 | 2017-06-12 00:15:36 +0000 | I can't believe this keeps happening. This, is a birb taking a bath. We only rate dogs. Please only send dogs. Thank you... 12/10 https://t.co/pwY9PQhtP2 | NaN | None | None | None | None | 12.0 |
96 | 873580283840344065 | 2017-06-10 16:39:04 +0000 | We usually don't rate Deck-bound Saskatoon Black Bears, but this one is h*ckin flawless. Sneaky tongue slip too. 13/10 would hug firmly https://t.co/mNuMH9400n | NaN | None | None | None | None | 13.0 |
99 | 872967104147763200 | 2017-06-09 00:02:31 +0000 | Here's a very large dog. He has a date later. Politely asked this water person to check if his breath is bad. 12/10 good to go doggo https://t.co/EMYIdoblMR | NaN | doggo | None | None | None | 12.0 |
100 | 872820683541237760 | 2017-06-08 14:20:41 +0000 | Here are my favorite #dogsatpollingstations \nMost voted for a more consistent walking schedule and to increase daily pats tenfold. All 13/10 https://t.co/17FVMl4VZ5 | NaN | None | None | None | None | 13.0 |
103 | 872486979161796608 | 2017-06-07 16:14:40 +0000 | We. Only. Rate. Dogs. Do not send in other things like this fluffy floor shark clearly ready to attack. Get it together guys... 12/10 https://t.co/BZHiKx3FpQ | NaN | None | None | None | None | 12.0 |
110 | 871102520638267392 | 2017-06-03 20:33:19 +0000 | Never doubt a doggo 14/10 https://t.co/AbBLh2FZCH | NaN | doggo | None | None | None | 14.0 |
112 | 870804317367881728 | 2017-06-03 00:48:22 +0000 | Real funny guys. Sending in a pic without a dog in it. Hilarious. We'll rate the rug tho because it's giving off a very good vibe. 11/10 https://t.co/GCD1JccCyi | NaN | None | None | None | None | 11.0 |
125 | 868622495443632128 | 2017-05-28 00:18:35 +0000 | Here's a h*ckin peaceful boy. Unbothered by the comings and goings. 13/10 please reveal your wise ways https://t.co/yeaH8Ej5eM | NaN | None | None | None | None | 13.0 |
127 | 867900495410671616 | 2017-05-26 00:29:37 +0000 | Unbelievable. We only rate dogs. Please don't send in non-canines like the "I" from Pixar's opening credits. Thank you... 12/10 https://t.co/JMhDNv5wXZ | NaN | None | None | None | None | 12.0 |
131 | 867051520902168576 | 2017-05-23 16:16:06 +0000 | Oh my this spooked me up. We only rate dogs, not happy ghosts. Please send dogs only. It's a very simple premise. Thank you... 13/10 https://t.co/M5Rz0R8SIQ | NaN | None | None | None | None | 13.0 |
133 | 866720684873056260 | 2017-05-22 18:21:28 +0000 | He was providing for his family 13/10 how dare you https://t.co/Q8mVwWN3f4 | NaN | None | None | None | None | 13.0 |
141 | 864873206498414592 | 2017-05-17 16:00:15 +0000 | We only rate dogs. Please don't send in Jesus. We're trying to remain professional and legitimate. Thank you... 14/10 https://t.co/wr3xsjeCIR | NaN | None | None | None | None | 14.0 |
149 | 863079547188785154 | 2017-05-12 17:12:53 +0000 | Ladies and gentlemen... I found Pipsy. He may have changed his name to Pablo, but he never changed his love for the sea. Pupgraded to 14/10 https://t.co/lVU5GyNFen | NaN | None | None | None | None | 14.0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
2326 | 666411507551481857 | 2015-11-17 00:24:19 +0000 | This is quite the dog. Gets really excited when not in water. Not very soft tho. Bad at fetch. Can't do tricks. 2/10 https://t.co/aMCTNWO94t | NaN | None | None | None | None | 2.0 |
2327 | 666407126856765440 | 2015-11-17 00:06:54 +0000 | This is a southern Vesuvius bumblegruff. Can drive a truck (wow). Made friends with 5 other nifty dogs (neat). 7/10 https://t.co/LopTBkKa8h | NaN | None | None | None | None | 7.0 |
2328 | 666396247373291520 | 2015-11-16 23:23:41 +0000 | Oh goodness. A super rare northeast Qdoba kangaroo mix. Massive feet. No pouch (disappointing). Seems alert. 9/10 https://t.co/Dc7b0E8qFE | NaN | None | None | None | None | 9.0 |
2329 | 666373753744588802 | 2015-11-16 21:54:18 +0000 | Those are sunglasses and a jean jacket. 11/10 dog cool af https://t.co/uHXrPkUEyl | NaN | None | None | None | None | 11.0 |
2330 | 666362758909284353 | 2015-11-16 21:10:36 +0000 | Unique dog here. Very small. Lives in container of Frosted Flakes (?). Short legs. Must be rare 6/10 would still pet https://t.co/XMD9CwjEnM | NaN | None | None | None | None | 6.0 |
2331 | 666353288456101888 | 2015-11-16 20:32:58 +0000 | Here we have a mixed Asiago from the Galápagos Islands. Only one ear working. Big fan of marijuana carpet. 8/10 https://t.co/tltQ5w9aUO | NaN | None | None | None | None | 8.0 |
2332 | 666345417576210432 | 2015-11-16 20:01:42 +0000 | Look at this jokester thinking seat belt laws don't apply to him. Great tongue tho 10/10 https://t.co/VFKG1vxGjB | NaN | None | None | None | None | 10.0 |
2333 | 666337882303524864 | 2015-11-16 19:31:45 +0000 | This is an extremely rare horned Parthenon. Not amused. Wears shoes. Overall very nice. 9/10 would pet aggressively https://t.co/QpRjllzWAL | NaN | None | None | None | None | 9.0 |
2334 | 666293911632134144 | 2015-11-16 16:37:02 +0000 | This is a funny dog. Weird toes. Won't come down. Loves branch. Refuses to eat his food. Hard to cuddle with. 3/10 https://t.co/IIXis0zta0 | NaN | None | None | None | None | 3.0 |
2335 | 666287406224695296 | 2015-11-16 16:11:11 +0000 | This is an Albanian 3 1/2 legged Episcopalian. Loves well-polished hardwood flooring. Penis on the collar. 9/10 https://t.co/d9NcXFKwLv | NaN | None | None | None | None | 9.0 |
2336 | 666273097616637952 | 2015-11-16 15:14:19 +0000 | Can take selfies 11/10 https://t.co/ws2AMaNwPW | NaN | None | None | None | None | 11.0 |
2337 | 666268910803644416 | 2015-11-16 14:57:41 +0000 | Very concerned about fellow dog trapped in computer. 10/10 https://t.co/0yxApIikpk | NaN | None | None | None | None | 10.0 |
2338 | 666104133288665088 | 2015-11-16 04:02:55 +0000 | Not familiar with this breed. No tail (weird). Only 2 legs. Doesn't bark. Surprisingly quick. Shits eggs. 1/10 https://t.co/Asgdc6kuLX | NaN | None | None | None | None | 1.0 |
2339 | 666102155909144576 | 2015-11-16 03:55:04 +0000 | Oh my. Here you are seeing an Adobe Setter giving birth to twins!!! The world is an amazing place. 11/10 https://t.co/11LvqN4WLq | NaN | None | None | None | None | 11.0 |
2340 | 666099513787052032 | 2015-11-16 03:44:34 +0000 | Can stand on stump for what seems like a while. Built that birdhouse? Impressive. Made friends with a squirrel. 8/10 https://t.co/Ri4nMTLq5C | NaN | None | None | None | None | 8.0 |
2341 | 666094000022159362 | 2015-11-16 03:22:39 +0000 | This appears to be a Mongolian Presbyterian mix. Very tired. Tongue slip confirmed. 9/10 would lie down with https://t.co/mnioXo3IfP | NaN | None | None | None | None | 9.0 |
2342 | 666082916733198337 | 2015-11-16 02:38:37 +0000 | Here we have a well-established sunblockerspaniel. Lost his other flip-flop. 6/10 not very waterproof https://t.co/3RU6x0vHB7 | NaN | None | None | None | None | 6.0 |
2343 | 666073100786774016 | 2015-11-16 01:59:36 +0000 | Let's hope this flight isn't Malaysian (lol). What a dog! Almost completely camouflaged. 10/10 I trust this pilot https://t.co/Yk6GHE9tOY | NaN | None | None | None | None | 10.0 |
2344 | 666071193221509120 | 2015-11-16 01:52:02 +0000 | Here we have a northern speckled Rhododendron. Much sass. Gives 0 fucks. Good tongue. 9/10 would caress sensually https://t.co/ZoL8kq2XFx | NaN | None | None | None | None | 9.0 |
2345 | 666063827256086533 | 2015-11-16 01:22:45 +0000 | This is the happiest dog you will ever see. Very committed owner. Nice couch. 10/10 https://t.co/RhUEAloehK | NaN | None | None | None | None | 10.0 |
2346 | 666058600524156928 | 2015-11-16 01:01:59 +0000 | Here is the Rand Paul of retrievers folks! He's probably good at poker. Can drink beer (lol rad). 8/10 good dog https://t.co/pYAJkAe76p | NaN | None | None | None | None | 8.0 |
2347 | 666057090499244032 | 2015-11-16 00:55:59 +0000 | My oh my. This is a rare blond Canadian terrier on wheels. Only $8.98. Rather docile. 9/10 very rare https://t.co/yWBqbrzy8O | NaN | None | None | None | None | 9.0 |
2348 | 666055525042405380 | 2015-11-16 00:49:46 +0000 | Here is a Siberian heavily armored polar bear mix. Strong owner. 10/10 I would do unspeakable things to pet this dog https://t.co/rdivxLiqEt | NaN | None | None | None | None | 10.0 |
2349 | 666051853826850816 | 2015-11-16 00:35:11 +0000 | This is an odd dog. Hard on the outside but loving on the inside. Petting still fun. Doesn't play catch well. 2/10 https://t.co/v5A4vzSDdc | NaN | None | None | None | None | 2.0 |
2350 | 666050758794694657 | 2015-11-16 00:30:50 +0000 | This is a truly beautiful English Wilson Staff retriever. Has a nice phone. Privileged. 10/10 would trade lives with https://t.co/fvIbQfHjIe | NaN | None | None | None | None | 10.0 |
2351 | 666049248165822465 | 2015-11-16 00:24:50 +0000 | Here we have a 1949 1st generation vulpix. Enjoys sweat tea and Fox News. Cannot be phased. 5/10 https://t.co/4B7cOc1EDq | NaN | None | None | None | None | 5.0 |
2352 | 666044226329800704 | 2015-11-16 00:04:52 +0000 | This is a purebred Piers Morgan. Loves to Netflix and chill. Always looks like he forgot to unplug the iron. 6/10 https://t.co/DWnyCjf2mx | NaN | None | None | None | None | 6.0 |
2353 | 666033412701032449 | 2015-11-15 23:21:54 +0000 | Here is a very happy pup. Big fan of well-maintained decks. Just look at that tongue. 9/10 would cuddle af https://t.co/y671yMhoiR | NaN | None | None | None | None | 9.0 |
2354 | 666029285002620928 | 2015-11-15 23:05:30 +0000 | This is a western brown Mitsubishi terrier. Upset about leaf. Actually 2 dogs here. 7/10 would walk the shit out of https://t.co/r7mOb2m0UI | NaN | None | None | None | None | 7.0 |
2355 | 666020888022790149 | 2015-11-15 22:32:08 +0000 | Here we have a Japanese Irish Setter. Lost eye in Vietnam (?). Big fan of relaxing on stair. 8/10 would pet https://t.co/BLDqew2Ijj | NaN | None | None | None | None | 8.0 |
694 rows × 9 columns
twitter_archive_enhanced_clean.name.value_counts()
Charlie 11
Cooper 10
Lucy 10
Oliver 9
Tucker 9
Winston 8
Lola 8
Penny 8
Daisy 7
Toby 7
Sadie 6
Koda 6
Bailey 6
Bella 6
Stanley 6
Bo 5
Scout 5
Louis 5
Leo 5
Buddy 5
Jax 5
Dave 5
Rusty 5
Oscar 5
Boomer 4
Chester 4
Sophie 4
Dexter 4
George 4
Winnie 4
..
Obi 1
Ester 1
Dallas 1
Beau & Wilbur 1
Moofasa 1
Dudley 1
Hercules 1
Petrick 1
Yoda 1
Rooney 1
Fabio 1
Balto 1
Birf 1
Cheryl AKA Queen Pupper of the Skies 1
Huck 1
Antony 1
Stephanus 1
Lassie 1
Howard 1
Striker 1
Cali 1
Marvin 1
Perry 1
Cleopatricia 1
Siba 1
Opie&Clarkus 1
Pete 1
Boston 1
Deacon 1
Alf 1
Name: name, Length: 990, dtype: int64
twitter_archive_enhanced_clean.name.sample(15)
1456 Colin
1142 NaN
279 Sojourner
1782 NaN
381 Ralphie
122 Gizmo
898 Lilli Bee & Honey Bear
873 Bruce
2003 Buddy
1489 Wally
2082 Sage
1569 Trooper & Maya
324 Lipton
717 Loomis
2163 Billl
Name: name, dtype: object
twitter_archive_enhanced_clean[twitter_archive_enhanced_clean["text"].str.contains("Zeke")==True]
tweet_id | timestamp | text | name | doggo | floofer | pupper | puppo | rating | |
---|---|---|---|---|---|---|---|---|---|
17 | 888804989199671297 | 2017-07-22 16:56:37 +0000 | This is Zeke. He has a new stick. Very proud of it. Would like you to throw it for him without taking it. 13/10 would do my best https://t.co/HTQ77yNQ5K | Zeke | None | None | None | None | 13.0 |
181 | 857029823797047296 | 2017-04-26 00:33:27 +0000 | This is Zeke. He performs group cheeky wink tutorials. Pawfect execution here. 12/10 would wink back https://t.co/uMH5CLjXJu | Zeke | None | None | None | None | 12.0 |
547 | 805520635690676224 | 2016-12-04 21:14:20 +0000 | This is Zeke the Wonder Dog. He never let that poor man keep his frisbees. One of the Spartans all time greatest receivers. 13/10 RIP Zeke https://t.co/zacX7S6GyJ | Zeke | None | None | None | None | 13.0 |
twitter_archive_enhanced
:狗狗地位数据缺失重新提取:
方法同上,通过str.findall
函数使用正则表达从text
列中提取狗狗地位
twitter_archive_enhanced_clean["stage"] = twitter_archive_enhanced_clean.text.str.lower().str.findall('doggo|floofer|pupper|puppo')
twitter_archive_enhanced_clean["stage"]
0 []
1 []
2 []
3 []
4 []
5 []
6 []
7 []
8 []
9 [doggo]
10 []
11 []
12 [puppo]
13 []
14 [puppo]
15 []
16 []
17 []
18 []
20 []
21 []
22 []
23 []
24 []
25 []
26 []
27 []
28 []
29 [pupper]
31 []
...
2326 []
2327 []
2328 []
2329 []
2330 []
2331 []
2332 []
2333 []
2334 []
2335 []
2336 []
2337 []
2338 []
2339 []
2340 []
2341 []
2342 []
2343 []
2344 []
2345 []
2346 []
2347 []
2348 []
2349 []
2350 []
2351 []
2352 []
2353 []
2354 []
2355 []
Name: stage, Length: 2117, dtype: object
#检测是否有缺失值
#twitter_archive_enhanced_clean.stage.isnull().sum()
for i in twitter_archive_enhanced_clean.stage:
if len(i)>1:
print(i)
['puppo', 'doggo', 'puppo']
['puppo', 'doggo']
['doggo', 'floofer']
['doggo', 'doggo']
['pupper', 'doggo']
['pupper', 'doggo', 'pupper', 'doggo']
['doggo', 'pupper']
['doggo', 'pupper']
['pupper', 'pupper']
['doggo', 'pupper']
['pupper', 'doggo']
['doggo', 'doggo']
['doggo', 'pupper']
['doggo', 'pupper']
['pupper', 'pupper']
['pupper', 'doggo']
['doggo', 'pupper']
['pupper', 'pupper']
['pupper', 'pupper']
['pupper', 'pupper']
['pupper', 'pupper']
['pupper', 'pupper']
['pupper', 'pupper']
['pupper', 'pupper']
['pupper', 'pupper', 'pupper']
twitter_archive_enhanced_clean["stage"] = twitter_archive_enhanced_clean.stage.apply(lambda x: ','.join(set(x)))
twitter_archive_enhanced_clean["stage"].value_counts()
1740
pupper 248
doggo 79
puppo 28
doggo,pupper 10
floofer 9
doggo,puppo 2
doggo,floofer 1
Name: stage, dtype: int64
# 删除"doggo","floofer","pupper","puppo"列
twitter_archive_enhanced_clean.drop(["doggo","floofer","pupper","puppo"],axis=1,inplace = True)
twitter_archive_enhanced_clean["stage"] =twitter_archive_enhanced_clean["stage"].replace("",np.nan)
twitter_archive_enhanced_clean[twitter_archive_enhanced_clean["stage"].isnull()]
tweet_id | timestamp | text | name | rating | stage | |
---|---|---|---|---|---|---|
0 | 892420643555336193 | 2017-08-01 16:23:56 +0000 | This is Phineas. He's a mystical boy. Only ever appears in the hole of a donut. 13/10 https://t.co/MgUWQ76dJU | Phineas | 13.0 | NaN |
1 | 892177421306343426 | 2017-08-01 00:17:27 +0000 | This is Tilly. She's just checking pup on you. Hopes you're doing ok. If not, she's available for pats, snugs, boops, the whole bit. 13/10 https://t.co/0Xxu71qeIV | Tilly | 13.0 | NaN |
2 | 891815181378084864 | 2017-07-31 00:18:03 +0000 | This is Archie. He is a rare Norwegian Pouncing Corgo. Lives in the tall grass. You never know when one may strike. 12/10 https://t.co/wUnZnhtVJB | Archie | 12.0 | NaN |
3 | 891689557279858688 | 2017-07-30 15:58:51 +0000 | This is Darla. She commenced a snooze mid meal. 13/10 happens to the best of us https://t.co/tD36da7qLQ | Darla | 13.0 | NaN |
4 | 891327558926688256 | 2017-07-29 16:00:24 +0000 | This is Franklin. He would like you to stop calling him "cute." He is a very fierce shark and should be respected as such. 12/10 #BarkWeek https://t.co/AtUZn91f7f | Franklin | 12.0 | NaN |
5 | 891087950875897856 | 2017-07-29 00:08:17 +0000 | Here we have a majestic great white breaching off South Africa's coast. Absolutely h*ckin breathtaking. 13/10 (IG: tucker_marlo) #BarkWeek https://t.co/kQ04fDDRmh | NaN | 13.0 | NaN |
6 | 890971913173991426 | 2017-07-28 16:27:12 +0000 | Meet Jax. He enjoys ice cream so much he gets nervous around it. 13/10 help Jax enjoy more things by clicking below\n\nhttps://t.co/Zr4hWfAs1H https://t.co/tVJBRMnhxl | Jax | 13.0 | NaN |
7 | 890729181411237888 | 2017-07-28 00:22:40 +0000 | When you watch your owner call another dog a good boy but then they turn back to you and say you're a great boy. 13/10 https://t.co/v0nONBcwxq | NaN | 13.0 | NaN |
8 | 890609185150312448 | 2017-07-27 16:25:51 +0000 | This is Zoey. She doesn't want to be one of the scary sharks. Just wants to be a snuggly pettable boatpet. 13/10 #BarkWeek https://t.co/9TwLuAGH0b | Zoey | 13.0 | NaN |
10 | 890006608113172480 | 2017-07-26 00:31:25 +0000 | This is Koda. He is a South Australian deckshark. Deceptively deadly. Frighteningly majestic. 13/10 would risk a petting #BarkWeek https://t.co/dVPW0B0Mme | Koda | 13.0 | NaN |
11 | 889880896479866881 | 2017-07-25 16:11:53 +0000 | This is Bruno. He is a service shark. Only gets out of the water to assist you. 13/10 terrifyingly good boy https://t.co/u1XPQMl29g | Bruno | 13.0 | NaN |
13 | 889638837579907072 | 2017-07-25 00:10:02 +0000 | This is Ted. He does his best. Sometimes that's not enough. But it's ok. 12/10 would assist https://t.co/f8dEDcrKSR | Ted | 12.0 | NaN |
15 | 889278841981685760 | 2017-07-24 00:19:32 +0000 | This is Oliver. You're witnessing one of his many brutal attacks. Seems to be playing with his victim. 13/10 fr*ckin frightening #BarkWeek https://t.co/WpHvrQedPb | Oliver | 13.0 | NaN |
16 | 888917238123831296 | 2017-07-23 00:22:39 +0000 | This is Jim. He found a fren. Taught him how to sit like the good boys. 12/10 for both https://t.co/chxruIOUJN | Jim | 12.0 | NaN |
17 | 888804989199671297 | 2017-07-22 16:56:37 +0000 | This is Zeke. He has a new stick. Very proud of it. Would like you to throw it for him without taking it. 13/10 would do my best https://t.co/HTQ77yNQ5K | Zeke | 13.0 | NaN |
18 | 888554962724278272 | 2017-07-22 00:23:06 +0000 | This is Ralphus. He's powering up. Attempting maximum borkdrive. 13/10 inspirational af https://t.co/YnYAFCTTiK | Ralphus | 13.0 | NaN |
20 | 888078434458587136 | 2017-07-20 16:49:33 +0000 | This is Gerald. He was just told he didn't get the job he interviewed for. A h*ckin injustice. 12/10 didn't want the job anyway https://t.co/DK7iDPfuRX | Gerald | 12.0 | NaN |
21 | 887705289381826560 | 2017-07-19 16:06:48 +0000 | This is Jeffrey. He has a monopoly on the pool noodles. Currently running a 'boop for two' midweek sale. 13/10 h*ckin strategic https://t.co/PhrUk20Q64 | Jeffrey | 13.0 | NaN |
22 | 887517139158093824 | 2017-07-19 03:39:09 +0000 | I've yet to rate a Venezuelan Hover Wiener. This is such an honor. 14/10 paw-inspiring af (IG: roxy.thedoxy) https://t.co/20VrLAA8ba | NaN | 14.0 | NaN |
23 | 887473957103951883 | 2017-07-19 00:47:34 +0000 | This is Canela. She attempted some fancy porch pics. They were unsuccessful. 13/10 someone help her https://t.co/cLyzpcUcMX | Canela | 13.0 | NaN |
24 | 887343217045368832 | 2017-07-18 16:08:03 +0000 | You may not have known you needed to see this today. 13/10 please enjoy (IG: emmylouroo) https://t.co/WZqNqygEyV | NaN | 13.0 | NaN |
25 | 887101392804085760 | 2017-07-18 00:07:08 +0000 | This... is a Jubilant Antarctic House Bear. We only rate dogs. Please only send dogs. Thank you... 12/10 would suffocate in floof https://t.co/4Ad1jzJSdp | NaN | 12.0 | NaN |
26 | 886983233522544640 | 2017-07-17 16:17:36 +0000 | This is Maya. She's very shy. Rarely leaves her cup. 13/10 would find her an environment to thrive in https://t.co/I6oNy0CgiT | Maya | 13.0 | NaN |
27 | 886736880519319552 | 2017-07-16 23:58:41 +0000 | This is Mingus. He's a wonderful father to his smol pup. Confirmed 13/10, but he needs your help\n\nhttps://t.co/bVi0Yr4Cff https://t.co/ISvKOSkd5b | Mingus | 13.0 | NaN |
28 | 886680336477933568 | 2017-07-16 20:14:00 +0000 | This is Derek. He's late for a dog meeting. 13/10 pet...al to the metal https://t.co/BCoWue0abA | Derek | 13.0 | NaN |
31 | 886258384151887873 | 2017-07-15 16:17:19 +0000 | This is Waffles. His doggles are pupside down. Unsure how to fix. 13/10 someone assist Waffles https://t.co/xZDA9Qsq1O | Waffles | 13.0 | NaN |
33 | 885984800019947520 | 2017-07-14 22:10:11 +0000 | Viewer discretion advised. This is Jimbo. He will rip ur finger right h*ckin off. Other dog clearly an accessory. 12/10 pls pet with caution https://t.co/BuveP0uMF1 | Jimbo | 12.0 | NaN |
34 | 885528943205470208 | 2017-07-13 15:58:47 +0000 | This is Maisey. She fell asleep mid-excavation. Happens to the best of us. 13/10 would pat noggin approvingly https://t.co/tp1kQ8i9JF | Maisey | 13.0 | NaN |
35 | 885518971528720385 | 2017-07-13 15:19:09 +0000 | I have a new hero and his name is Howard. 14/10 https://t.co/gzLHboL7Sk | Howard | 14.0 | NaN |
37 | 885167619883638784 | 2017-07-12 16:03:00 +0000 | Here we have a corgi undercover as a malamute. Pawbably doing important investigative work. Zero control over tongue happenings. 13/10 https://t.co/44ItaMubBf | NaN | 13.0 | NaN |
... | ... | ... | ... | ... | ... | ... |
2326 | 666411507551481857 | 2015-11-17 00:24:19 +0000 | This is quite the dog. Gets really excited when not in water. Not very soft tho. Bad at fetch. Can't do tricks. 2/10 https://t.co/aMCTNWO94t | NaN | 2.0 | NaN |
2327 | 666407126856765440 | 2015-11-17 00:06:54 +0000 | This is a southern Vesuvius bumblegruff. Can drive a truck (wow). Made friends with 5 other nifty dogs (neat). 7/10 https://t.co/LopTBkKa8h | NaN | 7.0 | NaN |
2328 | 666396247373291520 | 2015-11-16 23:23:41 +0000 | Oh goodness. A super rare northeast Qdoba kangaroo mix. Massive feet. No pouch (disappointing). Seems alert. 9/10 https://t.co/Dc7b0E8qFE | NaN | 9.0 | NaN |
2329 | 666373753744588802 | 2015-11-16 21:54:18 +0000 | Those are sunglasses and a jean jacket. 11/10 dog cool af https://t.co/uHXrPkUEyl | NaN | 11.0 | NaN |
2330 | 666362758909284353 | 2015-11-16 21:10:36 +0000 | Unique dog here. Very small. Lives in container of Frosted Flakes (?). Short legs. Must be rare 6/10 would still pet https://t.co/XMD9CwjEnM | NaN | 6.0 | NaN |
2331 | 666353288456101888 | 2015-11-16 20:32:58 +0000 | Here we have a mixed Asiago from the Galápagos Islands. Only one ear working. Big fan of marijuana carpet. 8/10 https://t.co/tltQ5w9aUO | NaN | 8.0 | NaN |
2332 | 666345417576210432 | 2015-11-16 20:01:42 +0000 | Look at this jokester thinking seat belt laws don't apply to him. Great tongue tho 10/10 https://t.co/VFKG1vxGjB | NaN | 10.0 | NaN |
2333 | 666337882303524864 | 2015-11-16 19:31:45 +0000 | This is an extremely rare horned Parthenon. Not amused. Wears shoes. Overall very nice. 9/10 would pet aggressively https://t.co/QpRjllzWAL | NaN | 9.0 | NaN |
2334 | 666293911632134144 | 2015-11-16 16:37:02 +0000 | This is a funny dog. Weird toes. Won't come down. Loves branch. Refuses to eat his food. Hard to cuddle with. 3/10 https://t.co/IIXis0zta0 | NaN | 3.0 | NaN |
2335 | 666287406224695296 | 2015-11-16 16:11:11 +0000 | This is an Albanian 3 1/2 legged Episcopalian. Loves well-polished hardwood flooring. Penis on the collar. 9/10 https://t.co/d9NcXFKwLv | NaN | 9.0 | NaN |
2336 | 666273097616637952 | 2015-11-16 15:14:19 +0000 | Can take selfies 11/10 https://t.co/ws2AMaNwPW | NaN | 11.0 | NaN |
2337 | 666268910803644416 | 2015-11-16 14:57:41 +0000 | Very concerned about fellow dog trapped in computer. 10/10 https://t.co/0yxApIikpk | NaN | 10.0 | NaN |
2338 | 666104133288665088 | 2015-11-16 04:02:55 +0000 | Not familiar with this breed. No tail (weird). Only 2 legs. Doesn't bark. Surprisingly quick. Shits eggs. 1/10 https://t.co/Asgdc6kuLX | NaN | 1.0 | NaN |
2339 | 666102155909144576 | 2015-11-16 03:55:04 +0000 | Oh my. Here you are seeing an Adobe Setter giving birth to twins!!! The world is an amazing place. 11/10 https://t.co/11LvqN4WLq | NaN | 11.0 | NaN |
2340 | 666099513787052032 | 2015-11-16 03:44:34 +0000 | Can stand on stump for what seems like a while. Built that birdhouse? Impressive. Made friends with a squirrel. 8/10 https://t.co/Ri4nMTLq5C | NaN | 8.0 | NaN |
2341 | 666094000022159362 | 2015-11-16 03:22:39 +0000 | This appears to be a Mongolian Presbyterian mix. Very tired. Tongue slip confirmed. 9/10 would lie down with https://t.co/mnioXo3IfP | NaN | 9.0 | NaN |
2342 | 666082916733198337 | 2015-11-16 02:38:37 +0000 | Here we have a well-established sunblockerspaniel. Lost his other flip-flop. 6/10 not very waterproof https://t.co/3RU6x0vHB7 | NaN | 6.0 | NaN |
2343 | 666073100786774016 | 2015-11-16 01:59:36 +0000 | Let's hope this flight isn't Malaysian (lol). What a dog! Almost completely camouflaged. 10/10 I trust this pilot https://t.co/Yk6GHE9tOY | NaN | 10.0 | NaN |
2344 | 666071193221509120 | 2015-11-16 01:52:02 +0000 | Here we have a northern speckled Rhododendron. Much sass. Gives 0 fucks. Good tongue. 9/10 would caress sensually https://t.co/ZoL8kq2XFx | NaN | 9.0 | NaN |
2345 | 666063827256086533 | 2015-11-16 01:22:45 +0000 | This is the happiest dog you will ever see. Very committed owner. Nice couch. 10/10 https://t.co/RhUEAloehK | NaN | 10.0 | NaN |
2346 | 666058600524156928 | 2015-11-16 01:01:59 +0000 | Here is the Rand Paul of retrievers folks! He's probably good at poker. Can drink beer (lol rad). 8/10 good dog https://t.co/pYAJkAe76p | NaN | 8.0 | NaN |
2347 | 666057090499244032 | 2015-11-16 00:55:59 +0000 | My oh my. This is a rare blond Canadian terrier on wheels. Only $8.98. Rather docile. 9/10 very rare https://t.co/yWBqbrzy8O | NaN | 9.0 | NaN |
2348 | 666055525042405380 | 2015-11-16 00:49:46 +0000 | Here is a Siberian heavily armored polar bear mix. Strong owner. 10/10 I would do unspeakable things to pet this dog https://t.co/rdivxLiqEt | NaN | 10.0 | NaN |
2349 | 666051853826850816 | 2015-11-16 00:35:11 +0000 | This is an odd dog. Hard on the outside but loving on the inside. Petting still fun. Doesn't play catch well. 2/10 https://t.co/v5A4vzSDdc | NaN | 2.0 | NaN |
2350 | 666050758794694657 | 2015-11-16 00:30:50 +0000 | This is a truly beautiful English Wilson Staff retriever. Has a nice phone. Privileged. 10/10 would trade lives with https://t.co/fvIbQfHjIe | NaN | 10.0 | NaN |
2351 | 666049248165822465 | 2015-11-16 00:24:50 +0000 | Here we have a 1949 1st generation vulpix. Enjoys sweat tea and Fox News. Cannot be phased. 5/10 https://t.co/4B7cOc1EDq | NaN | 5.0 | NaN |
2352 | 666044226329800704 | 2015-11-16 00:04:52 +0000 | This is a purebred Piers Morgan. Loves to Netflix and chill. Always looks like he forgot to unplug the iron. 6/10 https://t.co/DWnyCjf2mx | NaN | 6.0 | NaN |
2353 | 666033412701032449 | 2015-11-15 23:21:54 +0000 | Here is a very happy pup. Big fan of well-maintained decks. Just look at that tongue. 9/10 would cuddle af https://t.co/y671yMhoiR | NaN | 9.0 | NaN |
2354 | 666029285002620928 | 2015-11-15 23:05:30 +0000 | This is a western brown Mitsubishi terrier. Upset about leaf. Actually 2 dogs here. 7/10 would walk the shit out of https://t.co/r7mOb2m0UI | NaN | 7.0 | NaN |
2355 | 666020888022790149 | 2015-11-15 22:32:08 +0000 | Here we have a Japanese Irish Setter. Lost eye in Vietnam (?). Big fan of relaxing on stair. 8/10 would pet https://t.co/BLDqew2Ijj | NaN | 8.0 | NaN |
1740 rows × 6 columns
twitter_archive_enhanced_clean
tweet_id | timestamp | text | name | rating | stage | |
---|---|---|---|---|---|---|
0 | 892420643555336193 | 2017-08-01 16:23:56 +0000 | This is Phineas. He's a mystical boy. Only ever appears in the hole of a donut. 13/10 https://t.co/MgUWQ76dJU | Phineas | 13.0 | NaN |
1 | 892177421306343426 | 2017-08-01 00:17:27 +0000 | This is Tilly. She's just checking pup on you. Hopes you're doing ok. If not, she's available for pats, snugs, boops, the whole bit. 13/10 https://t.co/0Xxu71qeIV | Tilly | 13.0 | NaN |
2 | 891815181378084864 | 2017-07-31 00:18:03 +0000 | This is Archie. He is a rare Norwegian Pouncing Corgo. Lives in the tall grass. You never know when one may strike. 12/10 https://t.co/wUnZnhtVJB | Archie | 12.0 | NaN |
3 | 891689557279858688 | 2017-07-30 15:58:51 +0000 | This is Darla. She commenced a snooze mid meal. 13/10 happens to the best of us https://t.co/tD36da7qLQ | Darla | 13.0 | NaN |
4 | 891327558926688256 | 2017-07-29 16:00:24 +0000 | This is Franklin. He would like you to stop calling him "cute." He is a very fierce shark and should be respected as such. 12/10 #BarkWeek https://t.co/AtUZn91f7f | Franklin | 12.0 | NaN |
5 | 891087950875897856 | 2017-07-29 00:08:17 +0000 | Here we have a majestic great white breaching off South Africa's coast. Absolutely h*ckin breathtaking. 13/10 (IG: tucker_marlo) #BarkWeek https://t.co/kQ04fDDRmh | NaN | 13.0 | NaN |
6 | 890971913173991426 | 2017-07-28 16:27:12 +0000 | Meet Jax. He enjoys ice cream so much he gets nervous around it. 13/10 help Jax enjoy more things by clicking below\n\nhttps://t.co/Zr4hWfAs1H https://t.co/tVJBRMnhxl | Jax | 13.0 | NaN |
7 | 890729181411237888 | 2017-07-28 00:22:40 +0000 | When you watch your owner call another dog a good boy but then they turn back to you and say you're a great boy. 13/10 https://t.co/v0nONBcwxq | NaN | 13.0 | NaN |
8 | 890609185150312448 | 2017-07-27 16:25:51 +0000 | This is Zoey. She doesn't want to be one of the scary sharks. Just wants to be a snuggly pettable boatpet. 13/10 #BarkWeek https://t.co/9TwLuAGH0b | Zoey | 13.0 | NaN |
9 | 890240255349198849 | 2017-07-26 15:59:51 +0000 | This is Cassie. She is a college pup. Studying international doggo communication and stick theory. 14/10 so elegant much sophisticate https://t.co/t1bfwz5S2A | Cassie | 14.0 | doggo |
10 | 890006608113172480 | 2017-07-26 00:31:25 +0000 | This is Koda. He is a South Australian deckshark. Deceptively deadly. Frighteningly majestic. 13/10 would risk a petting #BarkWeek https://t.co/dVPW0B0Mme | Koda | 13.0 | NaN |
11 | 889880896479866881 | 2017-07-25 16:11:53 +0000 | This is Bruno. He is a service shark. Only gets out of the water to assist you. 13/10 terrifyingly good boy https://t.co/u1XPQMl29g | Bruno | 13.0 | NaN |
12 | 889665388333682689 | 2017-07-25 01:55:32 +0000 | Here's a puppo that seems to be on the fence about something haha no but seriously someone help her. 13/10 https://t.co/BxvuXk0UCm | NaN | 13.0 | puppo |
13 | 889638837579907072 | 2017-07-25 00:10:02 +0000 | This is Ted. He does his best. Sometimes that's not enough. But it's ok. 12/10 would assist https://t.co/f8dEDcrKSR | Ted | 12.0 | NaN |
14 | 889531135344209921 | 2017-07-24 17:02:04 +0000 | This is Stuart. He's sporting his favorite fanny pack. Secretly filled with bones only. 13/10 puppared puppo #BarkWeek https://t.co/y70o6h3isq | Stuart | 13.0 | puppo |
15 | 889278841981685760 | 2017-07-24 00:19:32 +0000 | This is Oliver. You're witnessing one of his many brutal attacks. Seems to be playing with his victim. 13/10 fr*ckin frightening #BarkWeek https://t.co/WpHvrQedPb | Oliver | 13.0 | NaN |
16 | 888917238123831296 | 2017-07-23 00:22:39 +0000 | This is Jim. He found a fren. Taught him how to sit like the good boys. 12/10 for both https://t.co/chxruIOUJN | Jim | 12.0 | NaN |
17 | 888804989199671297 | 2017-07-22 16:56:37 +0000 | This is Zeke. He has a new stick. Very proud of it. Would like you to throw it for him without taking it. 13/10 would do my best https://t.co/HTQ77yNQ5K | Zeke | 13.0 | NaN |
18 | 888554962724278272 | 2017-07-22 00:23:06 +0000 | This is Ralphus. He's powering up. Attempting maximum borkdrive. 13/10 inspirational af https://t.co/YnYAFCTTiK | Ralphus | 13.0 | NaN |
20 | 888078434458587136 | 2017-07-20 16:49:33 +0000 | This is Gerald. He was just told he didn't get the job he interviewed for. A h*ckin injustice. 12/10 didn't want the job anyway https://t.co/DK7iDPfuRX | Gerald | 12.0 | NaN |
21 | 887705289381826560 | 2017-07-19 16:06:48 +0000 | This is Jeffrey. He has a monopoly on the pool noodles. Currently running a 'boop for two' midweek sale. 13/10 h*ckin strategic https://t.co/PhrUk20Q64 | Jeffrey | 13.0 | NaN |
22 | 887517139158093824 | 2017-07-19 03:39:09 +0000 | I've yet to rate a Venezuelan Hover Wiener. This is such an honor. 14/10 paw-inspiring af (IG: roxy.thedoxy) https://t.co/20VrLAA8ba | NaN | 14.0 | NaN |
23 | 887473957103951883 | 2017-07-19 00:47:34 +0000 | This is Canela. She attempted some fancy porch pics. They were unsuccessful. 13/10 someone help her https://t.co/cLyzpcUcMX | Canela | 13.0 | NaN |
24 | 887343217045368832 | 2017-07-18 16:08:03 +0000 | You may not have known you needed to see this today. 13/10 please enjoy (IG: emmylouroo) https://t.co/WZqNqygEyV | NaN | 13.0 | NaN |
25 | 887101392804085760 | 2017-07-18 00:07:08 +0000 | This... is a Jubilant Antarctic House Bear. We only rate dogs. Please only send dogs. Thank you... 12/10 would suffocate in floof https://t.co/4Ad1jzJSdp | NaN | 12.0 | NaN |
26 | 886983233522544640 | 2017-07-17 16:17:36 +0000 | This is Maya. She's very shy. Rarely leaves her cup. 13/10 would find her an environment to thrive in https://t.co/I6oNy0CgiT | Maya | 13.0 | NaN |
27 | 886736880519319552 | 2017-07-16 23:58:41 +0000 | This is Mingus. He's a wonderful father to his smol pup. Confirmed 13/10, but he needs your help\n\nhttps://t.co/bVi0Yr4Cff https://t.co/ISvKOSkd5b | Mingus | 13.0 | NaN |
28 | 886680336477933568 | 2017-07-16 20:14:00 +0000 | This is Derek. He's late for a dog meeting. 13/10 pet...al to the metal https://t.co/BCoWue0abA | Derek | 13.0 | NaN |
29 | 886366144734445568 | 2017-07-15 23:25:31 +0000 | This is Roscoe. Another pupper fallen victim to spontaneous tongue ejections. Get the BlepiPen immediate. 12/10 deep breaths Roscoe https://t.co/RGE08MIJox | Roscoe | 12.0 | pupper |
31 | 886258384151887873 | 2017-07-15 16:17:19 +0000 | This is Waffles. His doggles are pupside down. Unsure how to fix. 13/10 someone assist Waffles https://t.co/xZDA9Qsq1O | Waffles | 13.0 | NaN |
... | ... | ... | ... | ... | ... | ... |
2326 | 666411507551481857 | 2015-11-17 00:24:19 +0000 | This is quite the dog. Gets really excited when not in water. Not very soft tho. Bad at fetch. Can't do tricks. 2/10 https://t.co/aMCTNWO94t | NaN | 2.0 | NaN |
2327 | 666407126856765440 | 2015-11-17 00:06:54 +0000 | This is a southern Vesuvius bumblegruff. Can drive a truck (wow). Made friends with 5 other nifty dogs (neat). 7/10 https://t.co/LopTBkKa8h | NaN | 7.0 | NaN |
2328 | 666396247373291520 | 2015-11-16 23:23:41 +0000 | Oh goodness. A super rare northeast Qdoba kangaroo mix. Massive feet. No pouch (disappointing). Seems alert. 9/10 https://t.co/Dc7b0E8qFE | NaN | 9.0 | NaN |
2329 | 666373753744588802 | 2015-11-16 21:54:18 +0000 | Those are sunglasses and a jean jacket. 11/10 dog cool af https://t.co/uHXrPkUEyl | NaN | 11.0 | NaN |
2330 | 666362758909284353 | 2015-11-16 21:10:36 +0000 | Unique dog here. Very small. Lives in container of Frosted Flakes (?). Short legs. Must be rare 6/10 would still pet https://t.co/XMD9CwjEnM | NaN | 6.0 | NaN |
2331 | 666353288456101888 | 2015-11-16 20:32:58 +0000 | Here we have a mixed Asiago from the Galápagos Islands. Only one ear working. Big fan of marijuana carpet. 8/10 https://t.co/tltQ5w9aUO | NaN | 8.0 | NaN |
2332 | 666345417576210432 | 2015-11-16 20:01:42 +0000 | Look at this jokester thinking seat belt laws don't apply to him. Great tongue tho 10/10 https://t.co/VFKG1vxGjB | NaN | 10.0 | NaN |
2333 | 666337882303524864 | 2015-11-16 19:31:45 +0000 | This is an extremely rare horned Parthenon. Not amused. Wears shoes. Overall very nice. 9/10 would pet aggressively https://t.co/QpRjllzWAL | NaN | 9.0 | NaN |
2334 | 666293911632134144 | 2015-11-16 16:37:02 +0000 | This is a funny dog. Weird toes. Won't come down. Loves branch. Refuses to eat his food. Hard to cuddle with. 3/10 https://t.co/IIXis0zta0 | NaN | 3.0 | NaN |
2335 | 666287406224695296 | 2015-11-16 16:11:11 +0000 | This is an Albanian 3 1/2 legged Episcopalian. Loves well-polished hardwood flooring. Penis on the collar. 9/10 https://t.co/d9NcXFKwLv | NaN | 9.0 | NaN |
2336 | 666273097616637952 | 2015-11-16 15:14:19 +0000 | Can take selfies 11/10 https://t.co/ws2AMaNwPW | NaN | 11.0 | NaN |
2337 | 666268910803644416 | 2015-11-16 14:57:41 +0000 | Very concerned about fellow dog trapped in computer. 10/10 https://t.co/0yxApIikpk | NaN | 10.0 | NaN |
2338 | 666104133288665088 | 2015-11-16 04:02:55 +0000 | Not familiar with this breed. No tail (weird). Only 2 legs. Doesn't bark. Surprisingly quick. Shits eggs. 1/10 https://t.co/Asgdc6kuLX | NaN | 1.0 | NaN |
2339 | 666102155909144576 | 2015-11-16 03:55:04 +0000 | Oh my. Here you are seeing an Adobe Setter giving birth to twins!!! The world is an amazing place. 11/10 https://t.co/11LvqN4WLq | NaN | 11.0 | NaN |
2340 | 666099513787052032 | 2015-11-16 03:44:34 +0000 | Can stand on stump for what seems like a while. Built that birdhouse? Impressive. Made friends with a squirrel. 8/10 https://t.co/Ri4nMTLq5C | NaN | 8.0 | NaN |
2341 | 666094000022159362 | 2015-11-16 03:22:39 +0000 | This appears to be a Mongolian Presbyterian mix. Very tired. Tongue slip confirmed. 9/10 would lie down with https://t.co/mnioXo3IfP | NaN | 9.0 | NaN |
2342 | 666082916733198337 | 2015-11-16 02:38:37 +0000 | Here we have a well-established sunblockerspaniel. Lost his other flip-flop. 6/10 not very waterproof https://t.co/3RU6x0vHB7 | NaN | 6.0 | NaN |
2343 | 666073100786774016 | 2015-11-16 01:59:36 +0000 | Let's hope this flight isn't Malaysian (lol). What a dog! Almost completely camouflaged. 10/10 I trust this pilot https://t.co/Yk6GHE9tOY | NaN | 10.0 | NaN |
2344 | 666071193221509120 | 2015-11-16 01:52:02 +0000 | Here we have a northern speckled Rhododendron. Much sass. Gives 0 fucks. Good tongue. 9/10 would caress sensually https://t.co/ZoL8kq2XFx | NaN | 9.0 | NaN |
2345 | 666063827256086533 | 2015-11-16 01:22:45 +0000 | This is the happiest dog you will ever see. Very committed owner. Nice couch. 10/10 https://t.co/RhUEAloehK | NaN | 10.0 | NaN |
2346 | 666058600524156928 | 2015-11-16 01:01:59 +0000 | Here is the Rand Paul of retrievers folks! He's probably good at poker. Can drink beer (lol rad). 8/10 good dog https://t.co/pYAJkAe76p | NaN | 8.0 | NaN |
2347 | 666057090499244032 | 2015-11-16 00:55:59 +0000 | My oh my. This is a rare blond Canadian terrier on wheels. Only $8.98. Rather docile. 9/10 very rare https://t.co/yWBqbrzy8O | NaN | 9.0 | NaN |
2348 | 666055525042405380 | 2015-11-16 00:49:46 +0000 | Here is a Siberian heavily armored polar bear mix. Strong owner. 10/10 I would do unspeakable things to pet this dog https://t.co/rdivxLiqEt | NaN | 10.0 | NaN |
2349 | 666051853826850816 | 2015-11-16 00:35:11 +0000 | This is an odd dog. Hard on the outside but loving on the inside. Petting still fun. Doesn't play catch well. 2/10 https://t.co/v5A4vzSDdc | NaN | 2.0 | NaN |
2350 | 666050758794694657 | 2015-11-16 00:30:50 +0000 | This is a truly beautiful English Wilson Staff retriever. Has a nice phone. Privileged. 10/10 would trade lives with https://t.co/fvIbQfHjIe | NaN | 10.0 | NaN |
2351 | 666049248165822465 | 2015-11-16 00:24:50 +0000 | Here we have a 1949 1st generation vulpix. Enjoys sweat tea and Fox News. Cannot be phased. 5/10 https://t.co/4B7cOc1EDq | NaN | 5.0 | NaN |
2352 | 666044226329800704 | 2015-11-16 00:04:52 +0000 | This is a purebred Piers Morgan. Loves to Netflix and chill. Always looks like he forgot to unplug the iron. 6/10 https://t.co/DWnyCjf2mx | NaN | 6.0 | NaN |
2353 | 666033412701032449 | 2015-11-15 23:21:54 +0000 | Here is a very happy pup. Big fan of well-maintained decks. Just look at that tongue. 9/10 would cuddle af https://t.co/y671yMhoiR | NaN | 9.0 | NaN |
2354 | 666029285002620928 | 2015-11-15 23:05:30 +0000 | This is a western brown Mitsubishi terrier. Upset about leaf. Actually 2 dogs here. 7/10 would walk the shit out of https://t.co/r7mOb2m0UI | NaN | 7.0 | NaN |
2355 | 666020888022790149 | 2015-11-15 22:32:08 +0000 | Here we have a Japanese Irish Setter. Lost eye in Vietnam (?). Big fan of relaxing on stair. 8/10 would pet https://t.co/BLDqew2Ijj | NaN | 8.0 | NaN |
2117 rows × 6 columns
twitter_archive_enhanced
:doggo
,floofer
,pupper
,puppo
四个列标题是值doggo
,floofer
,pupper
,puppo
这几列已经进行了清洗merge
函数按照"tweet_id"合并三张表##将 twitter_archive_enhanced_clean表中"tweet_id"列的数据类型更改为字符串
twitter_archive_enhanced_clean["tweet_id"] = twitter_archive_enhanced_clean.tweet_id.astype(str)
#使用astype将`image_predictions`表中"tweet_id"列的整数型转换为字符类型
image_predictions_clean.tweet_id = image_predictions_clean.tweet_id.astype(str)
extra_data_clean.info()
RangeIndex: 2352 entries, 0 to 2351
Data columns (total 3 columns):
tweet_id 2352 non-null object
retweet_count 2352 non-null int64
favorite_count 2352 non-null int64
dtypes: int64(2), object(1)
memory usage: 55.2+ KB
twitter_archive_enhanced_clean = pd.merge(twitter_archive_enhanced_clean,extra_data_clean,on=["tweet_id"],how="left" )
twitter_archive_master = pd.merge(twitter_archive_enhanced_clean,image_predictions_clean,on="tweet_id",how="inner")
#twitter_archive_master = twitter_archive_enhanced_clean.merge(image_predictions_clean,
# how='inner',on='tweet_id').merge(extra_data_clean,how='left',on='tweet_id')
twitter_archive_master.info()
Int64Index: 1994 entries, 0 to 1993
Data columns (total 19 columns):
tweet_id 1994 non-null object
timestamp 1994 non-null object
text 1994 non-null object
name 1381 non-null object
rating 1981 non-null float64
stage 342 non-null object
retweet_count 1994 non-null int64
favorite_count 1994 non-null int64
jpg_url 1994 non-null object
img_num 1994 non-null int64
p1 1994 non-null object
p1_conf 1994 non-null float64
p1_dog 1994 non-null bool
p2 1994 non-null object
p2_conf 1994 non-null float64
p2_dog 1994 non-null bool
p3 1994 non-null object
p3_conf 1994 non-null float64
p3_dog 1994 non-null bool
dtypes: bool(3), float64(4), int64(3), object(9)
memory usage: 270.7+ KB
twitter_archive_master.head()
tweet_id | timestamp | text | name | rating | stage | retweet_count | favorite_count | jpg_url | img_num | p1 | p1_conf | p1_dog | p2 | p2_conf | p2_dog | p3 | p3_conf | p3_dog | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 892420643555336193 | 2017-08-01 16:23:56 +0000 | This is Phineas. He's a mystical boy. Only ever appears in the hole of a donut. 13/10 https://t.co/MgUWQ76dJU | Phineas | 13.0 | NaN | 8842 | 39492 | https://pbs.twimg.com/media/DGKD1-bXoAAIAUK.jpg | 1 | orange | 0.097049 | False | bagel | 0.085851 | False | banana | 0.076110 | False |
1 | 892177421306343426 | 2017-08-01 00:17:27 +0000 | This is Tilly. She's just checking pup on you. Hopes you're doing ok. If not, she's available for pats, snugs, boops, the whole bit. 13/10 https://t.co/0Xxu71qeIV | Tilly | 13.0 | NaN | 6480 | 33786 | https://pbs.twimg.com/media/DGGmoV4XsAAUL6n.jpg | 1 | Chihuahua | 0.323581 | True | Pekinese | 0.090647 | True | papillon | 0.068957 | True |
2 | 891815181378084864 | 2017-07-31 00:18:03 +0000 | This is Archie. He is a rare Norwegian Pouncing Corgo. Lives in the tall grass. You never know when one may strike. 12/10 https://t.co/wUnZnhtVJB | Archie | 12.0 | NaN | 4301 | 25445 | https://pbs.twimg.com/media/DGBdLU1WsAANxJ9.jpg | 1 | Chihuahua | 0.716012 | True | malamute | 0.078253 | True | kelpie | 0.031379 | True |
3 | 891689557279858688 | 2017-07-30 15:58:51 +0000 | This is Darla. She commenced a snooze mid meal. 13/10 happens to the best of us https://t.co/tD36da7qLQ | Darla | 13.0 | NaN | 8925 | 42863 | https://pbs.twimg.com/media/DF_q7IAWsAEuuN8.jpg | 1 | paper_towel | 0.170278 | False | Labrador_retriever | 0.168086 | True | spatula | 0.040836 | False |
4 | 891327558926688256 | 2017-07-29 16:00:24 +0000 | This is Franklin. He would like you to stop calling him "cute." He is a very fierce shark and should be respected as such. 12/10 #BarkWeek https://t.co/AtUZn91f7f | Franklin | 12.0 | NaN | 9721 | 41016 | https://pbs.twimg.com/media/DF6hr6BUMAAzZgT.jpg | 2 | basset | 0.555712 | True | English_springer | 0.225770 | True | German_short-haired_pointer | 0.175219 | True |
twitter_archive_master.tail()
tweet_id | timestamp | text | name | rating | stage | retweet_count | favorite_count | jpg_url | img_num | p1 | p1_conf | p1_dog | p2 | p2_conf | p2_dog | p3 | p3_conf | p3_dog | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
1989 | 666049248165822465 | 2015-11-16 00:24:50 +0000 | Here we have a 1949 1st generation vulpix. Enjoys sweat tea and Fox News. Cannot be phased. 5/10 https://t.co/4B7cOc1EDq | NaN | 5.0 | NaN | 41 | 111 | https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg | 1 | miniature_pinscher | 0.560311 | True | Rottweiler | 0.243682 | True | Doberman | 0.154629 | True |
1990 | 666044226329800704 | 2015-11-16 00:04:52 +0000 | This is a purebred Piers Morgan. Loves to Netflix and chill. Always looks like he forgot to unplug the iron. 6/10 https://t.co/DWnyCjf2mx | NaN | 6.0 | NaN | 147 | 309 | https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg | 1 | Rhodesian_ridgeback | 0.408143 | True | redbone | 0.360687 | True | miniature_pinscher | 0.222752 | True |
1991 | 666033412701032449 | 2015-11-15 23:21:54 +0000 | Here is a very happy pup. Big fan of well-maintained decks. Just look at that tongue. 9/10 would cuddle af https://t.co/y671yMhoiR | NaN | 9.0 | NaN | 47 | 128 | https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg | 1 | German_shepherd | 0.596461 | True | malinois | 0.138584 | True | bloodhound | 0.116197 | True |
1992 | 666029285002620928 | 2015-11-15 23:05:30 +0000 | This is a western brown Mitsubishi terrier. Upset about leaf. Actually 2 dogs here. 7/10 would walk the shit out of https://t.co/r7mOb2m0UI | NaN | 7.0 | NaN | 48 | 132 | https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg | 1 | redbone | 0.506826 | True | miniature_pinscher | 0.074192 | True | Rhodesian_ridgeback | 0.072010 | True |
1993 | 666020888022790149 | 2015-11-15 22:32:08 +0000 | Here we have a Japanese Irish Setter. Lost eye in Vietnam (?). Big fan of relaxing on stair. 8/10 would pet https://t.co/BLDqew2Ijj | NaN | 8.0 | NaN | 530 | 2528 | https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg | 1 | Welsh_springer_spaniel | 0.465074 | True | collie | 0.156665 | True | Shetland_sheepdog | 0.061428 | True |
twitter_archive_enhanced
表中有"tweet_id","timestamp"两列数据类型错误,出于需要,"tweet_id"列数据类型已转换利用to_date_time
将"timestamp"列转换为python的日期类型
twitter_archive_master["timestamp"] = pd.to_datetime(twitter_archive_master["timestamp"],format="%Y-%m-%d")
#通过to_csv存储为 twitter_archive_master.csv
twitter_archive_master.to_csv("twitter_archive_master.csv",index=False)
twitter_master = pd.read_csv("twitter_archive_master.csv")# 读取清洗后整合的csv文件
% matplotlib inline
import matplotlib.pyplot as plt
plt.figure()
ax1 = twitter_master["name"].value_counts().head(25).plot(kind="bar",figsize=(12,5),color="#C0C0C0",legend=True,label="Number of dog's name")
ax2 = (twitter_master["name"].value_counts().head(25) / len(twitter_master["name"].value_counts())).plot(secondary_y=True,legend=True,label="Ratio of dog's name",mark_right=False,style='r')#设置第二个y轴(右y轴)
plt.title("Dog's name for TOP 25")
ax1.set_ylabel("Numbmer of dog's name")
ax2.set_ylabel("Ratio of dog's name")
plt.gcf().autofmt_xdate() # 横坐标倾斜
% matplotlib inline
from wordcloud import WordCloud
from PIL import Image
from os import path
name=twitter_master.name.dropna()# 去除缺失值
dog_mask = np.array(Image.open(path.join("timg.jpg")))
wc = WordCloud(background_color="white", max_words=2075, mask=dog_mask)
wc.generate(' '.join(name))
plt.figure(figsize=(12,6))
plt.imshow(wc, interpolation='bilinear')
plt.axis("off")
plt.show()
twitter_master.rating.describe()
count 1981.000000
mean 11.642797
std 40.772727
min 0.000000
25% 10.000000
50% 11.000000
75% 12.000000
max 1776.000000
Name: rating, dtype: float64
Q3 = 12
Q1 = 10
IQR = Q3 -Q1
Max = Q3 + 1.5*IQR
Min = Q1 -1.5*IQR
twitter_master = twitter_master[(twitter_master.rating < Max)&(twitter_master.rating>Min)]
twitter_master.rating.describe()
count 1813.000000
mean 11.036338
std 1.437450
min 7.500000
25% 10.000000
50% 11.000000
75% 12.000000
max 14.000000
Name: rating, dtype: float64
import seaborn as sns
% matplotlib inline
plt.figure(figsize=(9,5))
sns.distplot(twitter_master.rating.dropna(), hist=True,bins=8, kde=True)
sns.kdeplot(twitter_master.rating.dropna(),shade=True,color='r')
sns.rugplot(twitter_master.rating.dropna())
plt.title("Histogram and density diagram for rating")
plt.show()
import matplotlib.dates as mdate
import matplotlib.dates as mdates
from matplotlib.ticker import FormatStrFormatter
% matplotlib inline
date2num = mdate.strpdate2num('%Y-%m-%d')# 设置日期显示格式
twitter_master["timestamp"] = pd.to_datetime(twitter_master["timestamp"],format="%Y-%m-%d")# 转换“timestamp”列为日期类型
#fig = plt.figure(figsize=(40,40))
#fig.autofmt_xdate()# 设置x轴时间外观
fig,axes = plt.subplots(3,1,sharex = True,sharey= False) # 绘制三幅图
#twitter_master.plot(kind="bar",ax = axes[0],color="k",alpha=0.7)
##twitter_master.plot(kind="line",ax = axes[1],color="k",alpha=0.7)
axes[0].set_xlim(date2num('2016-07-01'),date2num('2017-08-01')) # 设置x轴范围
plt.gca().xaxis.set_major_locator(mdates.MonthLocator()) # 设置时间间隔
plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%Y-%m')) # 设置横坐标轴日期显示格式
#plot
axes[0].plot(twitter_master["timestamp"],twitter_master['rating'],color="#1F77B4")
axes[1].plot(twitter_master["timestamp"],twitter_master['favorite_count'],color="#FF7F0E")
axes[2].plot(twitter_master["timestamp"],twitter_master['retweet_count'],color="#2CA02C")
plt.xticks(rotation=45) # 显示日期旋转45度
axes[0].set_title("rating with time")
axes[1].set_title("favorite count with time")
axes[2].set_title("retweet count with time")
plt.xlabel("date")
axes[0].set_ylabel("rating")
axes[1].set_ylabel("favorite count")
axes[2].set_ylabel("retweet count")
plt.subplots_adjust(right=2,top=1.5,hspace=0.2)
plt.grid(False)
plt.show()
% matplotlib inline
plt.scatter(twitter_master['rating'], twitter_master['favorite_count'], alpha=0.5, c="#17BECF",)
plt.title("rating and favorite count")
plt.xlabel("rating")
plt.ylabel("favorite count")
plt.legend(loc='upper left')
plt.subplots_adjust(right=1.5,top=1)
plt.show()
关于狗的rating部分
http://discussions.youdaxue.com/t/rating/58299
[助教分享]for 循环和 apply 函数不要混用(Pandas 中如何遍历数据集)
http://discussions.youdaxue.com/t/for-apply-pandas/64971
推特图像预测数据【整洁度问题】
http://discussions.youdaxue.com/t/topic/61725/7
python正则表达式分组
https://blog.csdn.net/qq_42739440/article/details/81117919
python正则表达式详解
https://www.cnblogs.com/dyfblog/p/5880728.html
项目- 清洗与分析数据,筛选评分结果不正确
http://discussions.youdaxue.com/t/topic/65586
狗的评分问题(rating)
http://discussions.youdaxue.com/t/rating/61554/4
Python: Extract numbers from a string
https://stackoverflow.com/questions/4289331/python-extract-numbers-from-a-string
Pandas–DataFrame修改值
https://blog.csdn.net/qq_33711966/article/details/79902276
6 种 Python 数据可视化工具
http://python.jobbole.com/85601/
搞懂箱形图分析,快速识别异常值!
https://blog.csdn.net/eric_e/article/details/80944744
【特征工程】一种异常值检测方法、原理、代码实现 (基于箱线图)
https://blog.csdn.net/sscc_learning/article/details/78771324
matplotlib命令与格式
https://blog.csdn.net/helunqu2017/article/details/78736686
日期格式
https://blog.csdn.net/belldeep/article/details/78170274
matplotlib官方文档
https://matplotlib.org/gallery/lines_bars_and_markers/spectrum_demo.html#sphx-glr-gallery-lines-bars-and-markers-spectrum-demo-py