Hello, I am getting error loading dependencies error when running this code. This is supposed to be a webscraper where the user inputs the search term and amount of text to scrape.
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import dash_table
import pandas as pd
import praw #for Reddit threads
from psaw import PushshiftAPI #for Reddit comments
import twint #for tweets
import nest_asyncio
nest_asyncio.apply()
app = dash.Dash()
app.scripts.config.serve_locally = True
app.css.config.serve_locally = True
app.config['suppress_callback_exceptions'] = True
app.layout = html.Div([
html.Div([
html.Div([
dcc.Input(
id = "ScreenName_Input",
placeholder='Enter a word',
type='text',
value=''
),
dcc.Input(
id = "numby",
placeholder='Enter a number',
type='number',
value=''
)
],style={'width': '48%', 'float': 'right', 'display': 'inline-block'})
]),
html.Button(id='screenNames_submit_button', children='Submit'),
dash_table.DataTable(
id='tweet_table',
columns=[
{'name': 'Date', 'id': 'column1'},
{'name': 'Score', 'id': 'column2'},
{'name': 'Text', 'id': 'column3'},
{'name': 'Upvote Ratio', 'id': 'column4'},
{'name': 'Type', 'id': 'column5'},
{'name': 'Likes', 'id': 'column6'},
{'name': 'Retweets', 'id': 'column7'},
],
data=[])
])
@app.callback(
Output(component_id='tweet_table', component_property='data'),
[Input(component_id='screenNames_submit_button', component_property='n_clicks_timestamp')],
[State(component_id='ScreenName_Input', component_property='value')],
[State(component_id='numby', component_property='value')]
)
def ctoscraper(button, text, number):
#Part 1: for Reddit threads
reddit = praw.Reddit(client_id='x6e1LTj2OQnGYw',
client_secret='cKPjQfnskfy1w5IlwLi6Aos-DMU',
user_agent='trialnew',
username='opposity',
password='redacted')
all = reddit.subreddit("technology+tech+futurology+engineering+army+navy+airforce+geek+military+scifi+science")
topics_dict = { "Text":[], "Date":[],"Score":[],"Upvote_Ratio":[]}
for submission in all.search(text, limit = number):
topics_dict["Text"].append(submission.title)
topics_dict["Date"].append(submission.created_utc)
topics_dict["Score"].append(submission.score)
topics_dict["Upvote_Ratio"].append(submission.upvote_ratio)
topics_data = pd.DataFrame(topics_dict)
topics_data['Date'] = (pd.to_datetime(topics_data['Date'], unit='s'))
listeforlabel = ['Reddit Thread'] * number
dflisteforlabel = pd.Series(listeforlabel)
upnewdf = pd.concat([topics_data, dflisteforlabel], axis=1)
#Part 2: for Reddit comments
subbies = ["technology","tech","futurology","engineering","army","navy","airforce","geek","military","scifi","science"]
api = PushshiftAPI()
gen = api.search_comments(q = text, subreddit = subbies)
cache = []
for c in gen:
cache.append(c)
if len(cache) >= number:
break
comments_dict = { "Text":[], "Date":[]}
for x in cache:
comments_dict["Text"].append(str(x[14]))
comments_dict["Date"].append(x[16])
commentdf = pd.DataFrame(comments_dict)
commentdf['Date'] = (pd.to_datetime(commentdf['Date'], unit='s', errors = "coerce"))
listforlabel = ['Reddit Comment'] * number
dflistforlabel = pd.Series(listforlabel)
newdf = pd.concat([commentdf, dflistforlabel], axis=1)
#Part 3: for Tweets
c = twint.Config()
c.Search = text
c.Limit = number
c.Pandas = True
#c.Since
#c.Until
twint.run.Search(c)
Tweets_df = twint.storage.panda.Tweets_df
necessary_text = Tweets_df.tweet
necessary_date = Tweets_df.date
necessary_likes = Tweets_df.nlikes
necessary_retweets = Tweets_df.nretweets
tweetlabel = ['Tweet'] * len(necessary_text)
tweforlabel = pd.Series(tweetlabel)
lastlabel = pd.concat([necessary_text, necessary_date], axis=1)
finallabel = pd.concat([lastlabel,tweforlabel], axis = 1)
finallabel.rename(columns = {"tweet":"Text","date":"Date"}, inplace = True)
final2label = pd.concat([finallabel, necessary_likes], axis=1)
final3label = pd.concat([final2label, necessary_retweets], axis=1)
#Combine Part 1, Part 2, Part 3
frames = [upnewdf, newdf,final3label]
result = pd.concat(frames)
result.columns = ["Date","Score","Text", "Upvote Ratio", "Type","Likes","Retweets"]
return result.to_dict(orient = 'records')
if __name__ == '__main__':
app.run_server(debug=True)