Differences
This shows you the differences between two versions of the page.
Both sides previous revision Previous revision | Last revisionBoth sides next revision | ||
links [2018/11/22 01:56] – yair | links [2018/11/22 02:03] – yair | ||
---|---|---|---|
Line 4487: | Line 4487: | ||
- run | - run | ||
<code python> | <code python> | ||
+ | # grab all links we talked about | ||
from bs4 import BeautifulSoup | from bs4 import BeautifulSoup | ||
import requests | import requests | ||
+ | import re | ||
- | #url = requests.get('https:// | + | #use ' |
- | inbox_path=' | + | inbox_path=' |
print(inbox_path) | print(inbox_path) | ||
url = requests.get(inbox_path) | url = requests.get(inbox_path) | ||
- | |||
soup = BeautifulSoup(url.text, | soup = BeautifulSoup(url.text, | ||
- | |||
- | |||
with open(' | with open(' | ||
- | for link in soup.find_all(' | + | for link in soup.find_all(href=True): |
try: | try: | ||
- | print(link.string) | + | |
- | f.write(link.string.lstrip() + ' | + | link=link[' |
+ | | ||
+ | # afterwards pass .html\?.*$ into .html (in vscode to fomrat long aliexpress links...) | ||
+ | link=re.sub(r' | ||
+ | #TBD remove stickers messages with ' | ||
+ | #TBD shorten deep dirs | ||
+ | #TBD remove dups 'awk ' | ||
+ | #TBD reverse order | ||
+ | #TBD add {{currly bractes}} to media | ||
+ | #TBD extract date of event | ||
+ | #TBD auto uplaod relevant files to wiki | ||
+ | f.write(link.lstrip() + ' | ||
except: | except: | ||
TypeError | TypeError | ||
+ | |||
+ | |||
</ | </ | ||
* bonus, if you have long ass aliexpress links that you want to " | * bonus, if you have long ass aliexpress links that you want to " | ||