Skip to content
Permalink
master
Switch branches/tags

Name already in use

A tag already exists with the provided branch name. Many Git commands accept both tag and branch names, so creating this branch may cause unexpected behavior. Are you sure you want to create this branch?
Go to file
 
 
Cannot retrieve contributors at this time
"""created by the video assistance of sentdex 'https://www.youtube.com/watch?time_continue=1&v=tw8G8-KMRf8'"""
import sqlite3
import pandas as pd
timeframes = ["2015-01"]
for timeframe in timeframes:
connection = sqlite3.connect("{}.db".format(timeframe))
c = connection.cursor()
limit = 10000
last_unix = 0
cur_length = limit
counter = 0
test_done = False
while cur_length == limit:
#df=dataframe
df=pd.read_sql("SELECT * FROM parent_reply WHERE unix > {} AND parent NOT NULL AND score >0 ORDER BY unix ASC LIMIT {}".format(last_unix,limit),connection)
last_unix = df.tail(1)["unix"].values[0]
cur_length =len(df)
if not test_done:
with open("test.from","a", encoding = "utf8") as f:
for content in df ["parent"].values:
f.write(content+"\n")
with open("test.to","a", encoding = "utf8") as f:
for content in df ["parent"].values:
f.write(content+"\n")
test_done=True
else:
with open("train.from","a", encoding = "utf8") as f:
for content in df ["parent"].values:
f.write(content+"\n")
with open("train.to","a", encoding = "utf8") as f:
for content in df ["parent"].values:
f.write(content+"\n")
counter += 1
if counter % 20 ==0:
print(counter*limit,"rows completed so far")