kindEditor
1 官网:http://kindeditor.net/doc.php
2 文件夹说明:
├── asp asp示例├── asp.net asp.net示例├── attached 空文件夹,放置关联文件attached├── examples HTML示例├── jsp java示例├── kindeditor-all-min.js 全部JS(压缩)├── kindeditor-all.js 全部JS(未压缩)├── kindeditor-min.js 仅KindEditor JS(压缩)├── kindeditor.js 仅KindEditor JS(未压缩)├── lang 支持语言├── license.txt License├── php PHP示例├── plugins KindEditor内部使用的插件└── themes KindEditor主题
3 基本使用
1 2
4 详细参数
http://kindeditor.net/doc3.php?cmd=config
5 评论框示例
1
1 def upload_img(request): 2 upload_type=request.GET.get('dir') #查看上传过来的文件类型 3 file_obj=request.FILES.get('fafafa') 4 file_path=os.path.join('static/img',file_obj.name) 5 with open(file_path,'wb') as f: 6 for chunk in file_obj.chunks(): 7 f.write(chunk) 8 #返回编辑器认识的数据类型(图片保存的路径) 9 dic = {10 'error': 0,11 'url': '/' + file_path,12 'message': '错误了...'13 }14 15 return HttpResponse(json.dumps(dic))
6
提交文章评论时,尽量用form表单提交,会自动刷新网页,更新评论楼
利用ajax提交需要设置kindeditor,并且也需要在ajax中设置刷新本网页ajax location.href()
利用kindeditor装饰textarea时,
form表单提交时from表单会自动从kindeditor中获取textarea的值但是用jquery提交数据时,需要添加 KindEditor.create('',{ afterBlur: function(){this.sync();} })目的是在editor失去焦点时,执行一个函数,将editor获取的值同步到textarea中
应用场景:添加新随笔,评论
提交文件的内部原理是:该插件会自动生成一个iframe标签,上传图片时利用伪ajax提交数据前端:<script src="/static/kindeditor-4.1.10/kindeditor-all.js"></script> KindEditor.create('#i1',{ filePostName:'fafafa', 指定上传的文件的名字 uploadJson:'/upload_img.html', 指定上传文件的路径 extraFileUploadParams:{ 指定上传文件所带的额外的参数(伪ajax携带CSRF) 'csrfmiddlewaretoken':'{ { csrf_token }}' } })后端: request.GET.get('dir') 查看上传过来的文件类型 dic={ 返回kindeditor认识的数据类型(可进行预览) 'error':0, 'url':'/'+filepath, 'message':'错误' }
7 xss过滤特殊标签
BeautifulSoup是一个模块,该模块用于接收一个HTML或XML字符串,然后将其进行格式化,之后遍可以使用他提供的方法进行快速查找指定元素,从而使得在HTML或XML中查找指定元素变得简单。
处理依赖
1 | pip3 install beautifulsoup4 |
使用示例:
1 2 3 4 5 6 7 8 9 10 11 | from bs4 import BeautifulSoup html_doc = """ <html><head><title>The Dormouse's story</title></head> <body> ... </body> </html> """ soup = BeautifulSoup(html_doc, features = "lxml" ) |
1. name,标签名称
1 2 3 4 5 | # tag = soup.find('a') # name = tag.name # 获取 # print(name) # tag.name = 'span' # 设置 # print(soup) |
2. attr,标签属性
1 2 3 4 5 6 | # tag = soup.find('a') # attrs = tag.attrs # 获取 # print(attrs) # tag.attrs = {'ik':123} # 设置 # tag.attrs['id'] = 'iiiii' # 设置 # print(soup) |
3. children,所有子标签
1 2 | # body = soup.find('body') # v = body.children |
4. children,所有子子孙孙标签
1 2 | # body = soup.find('body') # v = body.descendants |
5. clear,将标签的所有子标签全部清空(保留标签名)
1 2 3 | # tag = soup.find('body') # tag.clear() # print(soup) |
6. decompose,递归的删除所有的标签
1 2 3 | # body = soup.find('body') # body.decompose() # print(soup) |
7. extract,递归的删除所有的标签,并获取删除的标签
1 2 3 | # body = soup.find('body') # v = body.extract() # print(soup) |
8. decode,转换为字符串(含当前标签);decode_contents(不含当前标签)
1 2 3 4 | # body = soup.find('body') # v = body.decode() # v = body.decode_contents() # print(v) |
9. encode,转换为字节(含当前标签);encode_contents(不含当前标签)
1 2 3 4 | # body = soup.find('body') # v = body.encode() # v = body.encode_contents() # print(v) |
10. find,获取匹配的第一个标签
1 2 3 4 5 | # tag = soup.find('a') # print(tag) # tag = soup.find(name='a', attrs={'class': 'sister'}, recursive=True, text='Lacie') # tag = soup.find(name='a', class_='sister', recursive=True, text='Lacie') # print(tag) |
11. find_all,获取匹配的所有标签
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 | # tags = soup.find_all('a') # print(tags) # tags = soup.find_all('a',limit=1) # print(tags) # tags = soup.find_all(name='a', attrs={'class': 'sister'}, recursive=True, text='Lacie') # # tags = soup.find(name='a', class_='sister', recursive=True, text='Lacie') # print(tags) # ####### 列表 ####### # v = soup.find_all(name=['a','div']) # print(v) # v = soup.find_all(class_=['sister0', 'sister']) # print(v) # v = soup.find_all(text=['Tillie']) # print(v, type(v[0])) # v = soup.find_all(id=['link1','link2']) # print(v) # v = soup.find_all(href=['link1','link2']) # print(v) # ####### 正则 ####### import re # rep = re.compile('p') # rep = re.compile('^p') # v = soup.find_all(name=rep) # print(v) # rep = re.compile('sister.*') # v = soup.find_all(class_=rep) # print(v) # rep = re.compile('http://www.oldboy.com/static/.*') # v = soup.find_all(href=rep) # print(v) # ####### 方法筛选 ####### # def func(tag): # return tag.has_attr('class') and tag.has_attr('id') # v = soup.find_all(name=func) # print(v) # ## get,获取标签属性 # tag = soup.find('a') # v = tag.get('id') # print(v) |
12. has_attr,检查标签是否具有该属性
1 2 3 | # tag = soup.find('a') # v = tag.has_attr('id') # print(v) |
13. get_text,获取标签内部文本内容
1 2 3 | # tag = soup.find('a') # v = tag.get_text('id') # print(v) |
14. index,检查标签在某标签中的索引位置
1 2 3 4 5 6 7 | # tag = soup.find('body') # v = tag.index(tag.find('div')) # print(v) # tag = soup.find('body') # for i,v in enumerate(tag): # print(i,v) |
15. is_empty_element,是否是空标签(是否可以是空)或者自闭合标签,
判断是否是如下标签:'br' , 'hr', 'input', 'img', 'meta','spacer', 'link', 'frame', 'base'
1 2 3 | # tag = soup.find('br') # v = tag.is_empty_element # print(v) |
16. 当前的关联标签
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 | # soup.next # soup.next_element # soup.next_elements # soup.next_sibling # soup.next_siblings # # tag.previous # tag.previous_element # tag.previous_elements # tag.previous_sibling # tag.previous_siblings # # tag.parent # tag.parents |
17. 查找某标签的关联标签
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | # tag.find_next(...) # tag.find_all_next(...) # tag.find_next_sibling(...) # tag.find_next_siblings(...) # tag.find_previous(...) # tag.find_all_previous(...) # tag.find_previous_sibling(...) # tag.find_previous_siblings(...) # tag.find_parent(...) # tag.find_parents(...) # 参数同find_all |
18. select,select_one, CSS选择器
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 | soup.select( "title" ) soup.select( "p nth-of-type(3)" ) soup.select( "body a" ) soup.select( "html head title" ) tag = soup.select( "span,a" ) soup.select( "head > title" ) soup.select( "p > a" ) soup.select( "p > a:nth-of-type(2)" ) soup.select( "p > #link1" ) soup.select( "body > a" ) soup.select( "#link1 ~ .sister" ) soup.select( "#link1 + .sister" ) soup.select( ".sister" ) soup.select( "[class~=sister]" ) soup.select( "#link1" ) soup.select( "a#link2" ) soup.select( 'a[href]' ) soup.select( 'a[href="http://example.com/elsie"]' ) soup.select( 'a[href^="http://example.com/"]' ) soup.select( 'a[href$="tillie"]' ) soup.select( 'a[href*=".com/el"]' ) from bs4.element import Tag def default_candidate_generator(tag): for child in tag.descendants: if not isinstance (child, Tag): continue if not child.has_attr( 'href' ): continue yield child tags = soup.find( 'body' ).select( "a" , _candidate_generator = default_candidate_generator) print ( type (tags), tags) from bs4.element import Tag def default_candidate_generator(tag): for child in tag.descendants: if not isinstance (child, Tag): continue if not child.has_attr( 'href' ): continue yield child tags = soup.find( 'body' ).select( "a" , _candidate_generator = default_candidate_generator, limit = 1 ) print ( type (tags), tags) |
19. 标签的内容
1 2 3 4 5 6 7 8 9 10 11 12 13 | # tag = soup.find('span') # print(tag.string) # 获取 # tag.string = 'new content' # 设置 # print(soup) # tag = soup.find('body') # print(tag.string) # tag.string = 'xxx' # print(soup) # tag = soup.find('body') # v = tag.stripped_strings # 递归内部获取所有标签的文本 # print(v) |
20.append在当前标签内部追加一个标签
1 2 3 4 5 6 7 8 9 10 | # tag = soup.find('body') # tag.append(soup.find('a')) # print(soup) # # from bs4.element import Tag # obj = Tag(name='i',attrs={'id': 'it'}) # obj.string = '我是一个新来的' # tag = soup.find('body') # tag.append(obj) # print(soup) |
21.insert在当前标签内部指定位置插入一个标签
1 2 3 4 5 6 | # from bs4.element import Tag # obj = Tag(name='i', attrs={'id': 'it'}) # obj.string = '我是一个新来的' # tag = soup.find('body') # tag.insert(2, obj) # print(soup) |
22. insert_after,insert_before 在当前标签后面或前面插入
1 2 3 4 5 6 7 | # from bs4.element import Tag # obj = Tag(name='i', attrs={'id': 'it'}) # obj.string = '我是一个新来的' # tag = soup.find('body') # # tag.insert_before(obj) # tag.insert_after(obj) # print(soup) |
23. replace_with 在当前标签替换为指定标签
1 2 3 4 5 6 | # from bs4.element import Tag # obj = Tag(name='i', attrs={'id': 'it'}) # obj.string = '我是一个新来的' # tag = soup.find('div') # tag.replace_with(obj) # print(soup) |
24. 创建标签之间的关系
1 2 3 4 | # tag = soup.find('div') # a = soup.find('a') # tag.setup(previous_sibling=a) # print(tag.previous_sibling) |
25. wrap,将指定标签把当前标签包裹起来
1 2 3 4 5 6 7 8 9 10 11 | # from bs4.element import Tag # obj1 = Tag(name='div', attrs={'id': 'it'}) # obj1.string = '我是一个新来的' # # tag = soup.find('a') # v = tag.wrap(obj1) # print(soup) # tag = soup.find('a') # v = tag.wrap(soup.find('p')) # print(soup) |
26. unwrap,去掉当前标签,将保留其包裹的标签
1 2 3 | # tag = soup.find('a') # v = tag.unwrap() # print(soup) |
后台插件过滤:
1 from bs4 import BeautifulSoup 2 3 def xss(content): 4 5 valid_tag={ 6 'p':['class','id'], 7 'img':['href','alt','src'], 8 'div':['class'] 9 }10 11 soup=BeautifulSoup(content,'html.parser')12 13 tags=soup.find_all()14 for tag in tags:15 if tag.name not in valid_tag:16 tag.decompose()17 if tag.attrs:18 for k in list(tag.attrs.keys()):19 if k not in valid_tag[tag.name]:20 del tag.attrs[k]21 22 content_str=soup.decode()23 return content_str
基于__new__实现单例模式示例:
1 from bs4 import BeautifulSoup 2 3 4 class XSSFilter(object): 5 __instance = None 6 7 def __init__(self): 8 # XSS白名单 9 self.valid_tags = {10 "font": ['color', 'size', 'face', 'style'],11 'b': [],12 'div': [],13 "span": [],14 "table": [15 'border', 'cellspacing', 'cellpadding'16 ],17 'th': [18 'colspan', 'rowspan'19 ],20 'td': [21 'colspan', 'rowspan'22 ],23 "a": ['href', 'target', 'name'],24 "img": ['src', 'alt', 'title'],25 'p': [26 'align'27 ],28 "pre": ['class'],29 "hr": ['class'],30 'strong': []31 }32 33 def __new__(cls, *args, **kwargs):34 """35 单例模式36 :param cls:37 :param args:38 :param kwargs:39 :return:40 """41 if not cls.__instance:42 obj = object.__new__(cls, *args, **kwargs)43 cls.__instance = obj44 return cls.__instance45 46 def process(self, content):47 soup = BeautifulSoup(content, 'lxml')48 # 遍历所有HTML标签49 for tag in soup.find_all(recursive=True):50 # 判断标签名是否在白名单中51 if tag.name not in self.valid_tags:52 tag.hidden = True53 if tag.name not in ['html', 'body']:54 tag.hidden = True55 tag.clear()56 continue57 # 当前标签的所有属性白名单58 attr_rules = self.valid_tags[tag.name]59 keys = list(tag.attrs.keys())60 for key in keys:61 if key not in attr_rules:62 del tag[key]63 64 return soup.renderContents()65 66 67 if __name__ == '__main__':68 html = """69 The Dormouse's story70
7172
73 Once upon a time there were three little sisters; and their names were74 75 Lacie and76 Tilffffffffffffflie;77 and they lived at the bottom of a well.78 7980 81...
"""82 83 obj = XSSFilter()84 v = obj.process(html)85 print(v)
发表评论
3 12