Differences

This shows you the differences between two versions of the page.

Link to this comparison view

Both sides previous revision Previous revision
Last revisionBoth sides next revision
links [2018/11/22 01:56] yairlinks [2018/11/22 02:03] yair
Line 4487: Line 4487:
   - run    - run 
 <code python> <code python>
 +# grab all links we talked about
 from bs4 import BeautifulSoup from bs4 import BeautifulSoup
 import requests import requests
 +import re
  
-#url = requests.get('https://redditmetrics.com/top') +#use 'python http.serveron the directory to serve the foldersb4 doesnt do local links 
-inbox_path='http://localhost:8000/ElBootkeh_64b09d1ba5/message.html'+inbox_path='http://localhost:8000/messages/inbox/ElBootkeh_64b09d1ba5/message.html'
 print(inbox_path) print(inbox_path)
 url = requests.get(inbox_path) url = requests.get(inbox_path)
- 
 soup = BeautifulSoup(url.text, 'html.parser') soup = BeautifulSoup(url.text, 'html.parser')
- 
- 
  
 with open('idiot_links.txt', 'w') as f: with open('idiot_links.txt', 'w') as f:
-    for link in soup.find_all('a'):+    for link in soup.find_all(href=True):
         try:         try:
-            print(link.string+            #grab href see> https://stackoverflow.com/q/5815747/184085 
-            f.write(link.string.lstrip() + '\n\n')+            link=link['href'
 +            print(link
 +            # afterwards pass .html\?.*$ into .html (in vscode to fomrat long aliexpress links...) 
 +            link=re.sub(r'.html\?.*$',r'.html',link) 
 +            #TBD remove stickers messages with '^messages/stickers_used.*png' 
 +            #TBD shorten deep dirs 
 +            #TBD remove dups 'awk '!seen[$0]++' idiot_links.txt > idiot_linksU.txt' 
 +            #TBD reverse order 
 +            #TBD add {{currly bractes}} to media 
 +            #TBD extract date of event 
 +            #TBD auto uplaod relevant files to wiki 
 +            f.write(link.lstrip() + '\n\n')
         except:         except:
             TypeError             TypeError
 +
 +
 </code> </code>
    
 * bonus, if you have long ass aliexpress links that you want to "cut" after the .html? just regexp them like so > {{2018-11-20-235041_1366x768_scrot.png?linkonly|image}} * bonus, if you have long ass aliexpress links that you want to "cut" after the .html? just regexp them like so > {{2018-11-20-235041_1366x768_scrot.png?linkonly|image}}